34#include <unordered_map>
39#include "VapourSynth4.h"
61enum class PlaneOp : std::uint8_t { PoProcess, PoCopy };
64 std::vector<VSNode*> nodes;
67 bool mirror_boundary =
false;
68 std::string dump_ir_path;
71 std::vector<std::pair<int, std::string>> required_props;
72 std::map<std::pair<int, std::string>,
int> prop_map;
75struct ExprData : BaseExprData {
76 std::array<PlaneOp, 3> plane_op = {};
77 std::array<CompiledFunction, 3> compiled;
78 std::array<std::vector<Token>, 3> tokens;
79 std::array<std::unique_ptr<analysis::AnalysisManager>, 3> analysis_managers;
84struct SingleExprData : BaseExprData {
86 std::vector<std::pair<std::string, PropWriteType>> output_props;
87 std::map<std::string, int> output_prop_map;
88 std::vector<Token> tokens;
89 std::unique_ptr<analysis::AnalysisManager> analysis_manager;
92struct SingleExprFrameData {
94 std::vector<float> buffer;
96 std::map<std::string, DynamicArray> dynamic_arrays;
99thread_local SingleExprFrameData
102std::unordered_map<std::string, std::pair<int, int>> expr_autotune_cache;
103std::mutex expr_autotune_cache_mutex;
105template <
bool check_dimensions>
106void validate_and_init_clips(BaseExprData* d,
const VSMap* in,
107 const VSAPI* vsapi) {
109 d->num_inputs = vsapi->mapNumElements(in,
"clips");
110 if (d->num_inputs == 0) {
111 throw std::runtime_error(
"At least one clip must be provided.");
114 d->nodes.resize(d->num_inputs);
115 for (
int i = 0; i < d->num_inputs; ++i) {
116 d->nodes[i] = vsapi->mapGetNode(in,
"clips", i, &err);
119 std::vector<const VSVideoInfo*> vi(d->num_inputs);
120 for (
int i = 0; i < d->num_inputs; ++i) {
121 vi[i] = vsapi->getVideoInfo(d->nodes[i]);
122 if (!vsh::isConstantVideoFormat(vi[i])) {
123 throw std::runtime_error(
124 "Only constant format clips are supported.");
128 if constexpr (check_dimensions) {
129 for (
int i = 1; i < d->num_inputs; ++i) {
130 if (vi[i]->width != vi[0]->width ||
131 vi[i]->height != vi[0]->height) {
132 throw std::runtime_error(
133 "All clips must have the same dimensions.");
141void parse_format_param(BaseExprData* d,
const VSMap* in,
const VSAPI* vsapi,
144 const int format_id =
145 static_cast<int>(vsapi->mapGetInt(in,
"format", 0, &err));
147 VSVideoFormat temp_format;
148 if (vsapi->getVideoFormatByID(&temp_format, format_id, core) != 0) {
149 if (d->vi.format.numPlanes != temp_format.numPlanes) {
150 throw std::runtime_error(
"The number of planes in the "
151 "inputs and output must match.");
153 VSVideoFormat new_format;
154 if (vsapi->queryVideoFormat(&new_format, d->vi.format.colorFamily,
155 temp_format.sampleType,
156 temp_format.bitsPerSample,
157 d->vi.format.subSamplingW,
158 d->vi.format.subSamplingH, core) != 0) {
159 d->vi.format = new_format;
161 throw std::runtime_error(
"Failed to query new format.");
167void parse_common_params(BaseExprData* d,
const VSMap* in,
const VSAPI* vsapi) {
170 const char* dump_path = vsapi->mapGetData(in,
"dump_ir", 0, &err);
171 if ((err == 0) && (dump_path !=
nullptr)) {
172 d->dump_ir_path = dump_path;
175 d->opt_level =
static_cast<int>(vsapi->mapGetInt(in,
"opt_level", 0, &err));
179 if (d->opt_level <= 0) {
180 throw std::runtime_error(
"opt_level must be greater than 0.");
184 static_cast<int>(vsapi->mapGetInt(in,
"approx_math", 0, &err));
188 if (d->approx_math < 0 || d->approx_math > 2) {
189 throw std::runtime_error(
190 "approx_math must be 0 (disabled), 1 (enabled), or 2 (auto).");
194void parse_expr_tiling_params(ExprData* d,
const VSMap* in,
195 const VSAPI* vsapi) {
197 d->tile_x =
static_cast<int>(vsapi->mapGetInt(in,
"tile_x", 0, &err));
202 d->tile_y =
static_cast<int>(vsapi->mapGetInt(in,
"tile_y", 0, &err));
207 if (d->tile_x < -1) {
208 throw std::runtime_error(
"tile_x must be -1 or >= 0.");
210 if (d->tile_y < -1) {
211 throw std::runtime_error(
"tile_y must be -1 or >= 0.");
215void read_frame_properties(
216 std::vector<float>& props,
const std::vector<const VSFrame*>& src_frames,
217 const std::vector<std::pair<int, std::string>>& required_props,
int n,
218 const VSAPI* vsapi) {
220 props[0] =
static_cast<float>(n);
222 for (
size_t i = 0; i < required_props.size(); ++i) {
223 const auto& prop_info = required_props[i];
224 int clip_idx = prop_info.first;
225 const std::string& prop_name = prop_info.second;
226 int prop_array_idx =
static_cast<int>(i) + 1;
228 const VSMap* props_map =
229 vsapi->getFramePropertiesRO(src_frames[clip_idx]);
231 int type = vsapi->mapGetType(props_map, prop_name.c_str());
234 props[prop_array_idx] =
static_cast<float>(
235 vsapi->mapGetInt(props_map, prop_name.c_str(), 0, &err));
236 }
else if (type == ptFloat) {
237 props[prop_array_idx] =
static_cast<float>(
238 vsapi->mapGetFloat(props_map, prop_name.c_str(), 0, &err));
239 }
else if (type == ptData) {
240 if (vsapi->mapGetDataSize(props_map, prop_name.c_str(), 0, &err) >
243 props[prop_array_idx] =
static_cast<float>(
244 *vsapi->mapGetData(props_map, prop_name.c_str(), 0, &err));
260void genericFree(
void* instanceData, [[maybe_unused]] VSCore* core,
261 const VSAPI* vsapi) {
263 std::unique_ptr<T> d(
static_cast<T*
>(instanceData));
264 for (
auto* node : d->nodes) {
265 vsapi->freeNode(node);
269std::string generate_cache_key(
270 const std::string& expr,
const VSVideoInfo* vo,
const VSAPI* vsapi,
271 const std::vector<const VSVideoInfo*>& vi,
bool mirror,
272 const std::map<std::pair<int, std::string>,
int>& prop_map,
int plane_width,
273 int plane_height,
const std::vector<std::string>& output_props = {},
274 int tile_x = 0,
int tile_y = 0,
int opt_level = 0,
int approx_math = 0) {
275 auto get_vf_name = [&](
const VSVideoFormat* vf) {
278 if (!vsapi->getVideoFormatName(vf, vf_name_buffer.data())) {
279 throw std::runtime_error(
"Failed to get video format name");
281 return std::string(vf_name_buffer.data());
284 std::format(
"expr={}|mirror={}|out={}|w={}|h={}", expr, mirror,
285 get_vf_name(&vo->format), plane_width, plane_height);
287 for (
size_t i = 0; i < vi.size(); ++i) {
288 result += std::format(
"|in{}={}|in{}_w={}|in{}_h={}", i,
289 get_vf_name(&vi[i]->format), i, vi[i]->width, i,
293 for (
const auto& [key, val] : prop_map) {
294 result += std::format(
"|prop{}={}.{}", val, key.first, key.second);
297 for (
const auto& prop : output_props) {
298 result += std::format(
"|out_prop={}", prop);
301 result += std::format(
"|tile_x={}|tile_y={}", tile_x, tile_y);
303 std::format(
"|opt_level={}|approx_math={}", opt_level, approx_math);
311 exprGetFrame(
int n,
int activationReason,
void* instanceData,
312 [[maybe_unused]]
void** frameData, VSFrameContext* frameCtx,
313 VSCore* core,
const VSAPI* vsapi) {
315 auto* d =
static_cast<ExprData*
>(instanceData);
317 if (activationReason == arInitial) {
318 for (
int i = 0; i < d->num_inputs; ++i) {
319 vsapi->requestFrameFilter(n, d->nodes[i], frameCtx);
321 }
else if (activationReason == arAllFramesReady) {
322 std::vector<const VSFrame*> src_frames(d->num_inputs);
323 for (
int i = 0; i < d->num_inputs; ++i) {
324 src_frames[i] = vsapi->getFrameFilter(n, d->nodes[i], frameCtx);
327 std::array<const VSFrame*, 3> plane_src = {
328 d->plane_op.at(0) == PlaneOp::PoCopy ? src_frames[0] :
nullptr,
329 d->plane_op.at(1) == PlaneOp::PoCopy ? src_frames[0] :
nullptr,
330 d->plane_op.at(2) == PlaneOp::PoCopy ? src_frames[0] :
nullptr};
331 std::array<int, 3> planes = {0, 1, 2};
332 VSFrame* dst_frame = vsapi->newVideoFrame2(
333 &d->vi.format, d->vi.width, d->vi.height, plane_src.data(),
334 planes.data(), src_frames[0], core);
336 std::vector<uint8_t*> rwptrs(d->num_inputs + 1);
337 std::vector<int> strides(d->num_inputs + 1);
338 std::vector<float> props(1 + d->required_props.size());
340 read_frame_properties(props, src_frames, d->required_props, n, vsapi);
342 for (
int plane = 0; plane < d->vi.format.numPlanes; ++plane) {
343 if (d->plane_op.at(plane) == PlaneOp::PoProcess) {
344 rwptrs[0] = vsapi->getWritePtr(dst_frame, plane);
346 static_cast<int>(vsapi->getStride(dst_frame, plane));
347 for (
int i = 0; i < d->num_inputs; ++i) {
350 uint8_t*
>(vsapi->getReadPtr(src_frames[i], plane));
351 strides[i + 1] =
static_cast<int>(
352 vsapi->getStride(src_frames[i], plane));
355 if (d->compiled.at(plane).func_ptr ==
nullptr) {
356 int width = vsapi->getFrameWidth(dst_frame, plane);
357 int height = vsapi->getFrameHeight(dst_frame, plane);
359 std::vector<const VSVideoInfo*> vi(d->num_inputs);
360 for (
int i = 0; i < d->num_inputs; ++i) {
361 vi[i] = vsapi->getVideoInfo(d->nodes[i]);
364 std::string expr_str;
365 for (
const auto& token : d->tokens.at(plane)) {
366 if (!expr_str.empty()) {
369 expr_str += token.text;
372 auto get_or_compile = [&](
int resolved_tile_x,
373 int resolved_tile_y) {
374 const std::string key = generate_cache_key(
375 expr_str, &d->vi, vsapi, vi, d->mirror_boundary,
376 d->prop_map, width, height, {}, resolved_tile_x,
377 resolved_tile_y, d->opt_level, d->approx_math);
381 size_t key_hash = std::hash<std::string>{}(key);
382 std::string func_name = std::format(
383 "process_plane_{}_{}", plane, key_hash);
387 *d->analysis_managers.at(plane));
389 std::vector<Token>(d->tokens.at(plane)),
390 &d->vi, vi, width, height,
391 d->mirror_boundary, d->dump_ir_path,
392 d->prop_map, func_name, d->opt_level,
393 d->approx_math, results, resolved_tile_x,
397 for (
const auto& frame : src_frames) {
398 vsapi->freeFrame(frame);
400 vsapi->freeFrame(dst_frame);
407 const bool auto_tile_x = d->tile_x == -1;
408 const bool auto_tile_y = d->tile_y == -1;
410 if (!auto_tile_x && !auto_tile_y) {
411 d->compiled.at(plane) =
412 get_or_compile(d->tile_x, d->tile_y);
414 const std::string autotune_key = generate_cache_key(
415 expr_str, &d->vi, vsapi, vi, d->mirror_boundary,
416 d->prop_map, width, height, {}, d->tile_x,
417 d->tile_y, d->opt_level, d->approx_math);
421 bool has_autotuned =
false;
423 std::lock_guard<std::mutex> lock(
424 expr_autotune_cache_mutex);
425 auto it = expr_autotune_cache.find(autotune_key);
426 if (it != expr_autotune_cache.end()) {
427 best_tile_x = it->second.first;
428 best_tile_y = it->second.second;
429 has_autotuned =
true;
433 if (!has_autotuned) {
434 constexpr std::array<int, 8> AUTO_TILE_CANDIDATES = {
438 std::vector<std::pair<int, int>> candidates;
439 if (auto_tile_x && auto_tile_y) {
440 candidates.reserve(AUTO_TILE_CANDIDATES.size() *
441 AUTO_TILE_CANDIDATES.size());
442 for (
int tx : AUTO_TILE_CANDIDATES) {
443 for (
int ty : AUTO_TILE_CANDIDATES) {
444 candidates.emplace_back(tx, ty);
447 }
else if (auto_tile_x) {
448 candidates.reserve(AUTO_TILE_CANDIDATES.size());
449 for (
int tx : AUTO_TILE_CANDIDATES) {
450 candidates.emplace_back(tx, d->tile_y);
453 candidates.reserve(AUTO_TILE_CANDIDATES.size());
454 for (
int ty : AUTO_TILE_CANDIDATES) {
455 candidates.emplace_back(d->tile_x, ty);
459 double best_time_ns =
460 std::numeric_limits<double>::max();
463 for (
const auto& [candidate_tile_x,
464 candidate_tile_y] : candidates) {
466 candidate_tile_x, candidate_tile_y);
469 candidate.
func_ptr(
nullptr, rwptrs.data(),
473 constexpr int MEASURED_RUNS =
476 std::chrono::steady_clock::now();
477 for (
int run = 0; run < MEASURED_RUNS; ++run) {
478 candidate.
func_ptr(
nullptr, rwptrs.data(),
483 std::chrono::steady_clock::now();
485 const double avg_time_ns =
487 std::chrono::duration_cast<
488 std::chrono::nanoseconds>(end -
491 static_cast<double>(MEASURED_RUNS);
492 if (avg_time_ns < best_time_ns) {
493 best_time_ns = avg_time_ns;
494 best_tile_x = candidate_tile_x;
495 best_tile_y = candidate_tile_y;
496 best_compiled = candidate;
500 if (best_compiled.
func_ptr ==
nullptr) {
501 throw std::runtime_error(
502 "Auto tile benchmark failed to select a "
507 std::lock_guard<std::mutex> lock(
508 expr_autotune_cache_mutex);
509 expr_autotune_cache[autotune_key] = {
510 best_tile_x, best_tile_y};
512 d->compiled.at(plane) = best_compiled;
514 d->compiled.at(plane) =
515 get_or_compile(best_tile_x, best_tile_y);
520 d->compiled.at(plane).func_ptr(
nullptr, rwptrs.data(),
521 strides.data(), props.data());
525 for (
const auto& frame : src_frames) {
526 vsapi->freeFrame(frame);
536exprFree(
void* instanceData, [[maybe_unused]] VSCore* core,
537 const VSAPI* vsapi) {
539 genericFree<ExprData>(instanceData, core, vsapi);
544exprCreate(
const VSMap* in, VSMap* out, [[maybe_unused]]
void* userData,
545 VSCore* core,
const VSAPI* vsapi) {
547 auto d = std::make_unique<ExprData>();
551 validate_and_init_clips<true>(d.get(), in, vsapi);
552 parse_format_param(d.get(), in, vsapi, core);
554 d->mirror_boundary = vsapi->mapGetInt(in,
"boundary", 0, &err) != 0;
556 const int nexpr = vsapi->mapNumElements(in,
"expr");
558 throw std::runtime_error(
559 "At least one expression must be provided.");
562 bool use_infix = vsapi->mapGetInt(in,
"infix", 0, &err) != 0;
564 std::array<std::string, 3> expr_strs;
565 for (
int i = 0; i < nexpr; ++i) {
566 std::string input_expr = vsapi->mapGetData(in,
"expr", i, &err);
567 if (use_infix && !input_expr.empty()) {
568 std::map<std::string, std::string> macros;
569 macros[
"__EXPR__"] =
"";
570 macros[
"__NUM_PLANES__"] =
571 std::to_string(d->vi.format.numPlanes);
572 macros[
"__WIDTH__"] = std::to_string(d->vi.width);
573 macros[
"__HEIGHT__"] = std::to_string(d->vi.height);
574 macros[
"__INPUT_NUM__"] = std::to_string(d->num_inputs);
575 macros[
"__OUTPUT_BITDEPTH__"] =
576 std::to_string(d->vi.format.bitsPerSample);
577 macros[
"__OUTPUT_COLORFAMILY__"] =
578 std::to_string(d->vi.format.colorFamily);
579 macros[
"__SUBSAMPLE_W__"] =
580 std::to_string(d->vi.format.subSamplingW);
581 macros[
"__SUBSAMPLE_H__"] =
582 std::to_string(d->vi.format.subSamplingH);
583 macros[
"__PLANE_NO__"] = std::to_string(i);
584 macros[
"__OUTPUT_SAMPLETYPE__"] = std::to_string(
585 (d->vi.format.sampleType == stFloat) ? 1 : 0);
587 for (
int j = 0; j < d->num_inputs; ++j) {
588 const VSVideoInfo* input_vi =
589 vsapi->getVideoInfo(d->nodes[j]);
590 macros[std::format(
"__INPUT_BITDEPTH_{}__", j)] =
591 std::to_string(input_vi->format.bitsPerSample);
592 macros[std::format(
"__INPUT_COLORFAMILY_{}__", j)] =
593 std::to_string(input_vi->format.colorFamily);
594 macros[std::format(
"__INPUT_NUM_PLANES_{}__", j)] =
595 std::to_string(input_vi->format.numPlanes);
596 macros[std::format(
"__INPUT_SAMPLETYPE_{}__", j)] =
598 (input_vi->format.sampleType == stFloat) ? 1 : 0);
605 expr_strs.at(i) = input_expr;
608 for (
int i = nexpr; i < d->vi.format.numPlanes; ++i) {
609 expr_strs.at(i) = expr_strs.at(nexpr - 1);
612 for (
int i = 0; i < d->vi.format.numPlanes; ++i) {
613 if (expr_strs.at(i).empty()) {
614 d->plane_op.at(i) = PlaneOp::PoCopy;
617 d->plane_op.at(i) = PlaneOp::PoProcess;
621 for (
const auto& token : d->tokens.at(i)) {
624 const auto& payload =
625 std::get<TokenPayloadPropAccess>(token.payload);
627 std::make_pair(payload.clip_idx, payload.prop_name);
628 if (!d->prop_map.contains(key)) {
629 d->prop_map[key] =
static_cast<int>(
630 1 + d->required_props
632 d->required_props.push_back(key);
637 auto analyser = std::make_unique<analysis::AnalysisManager>(
638 d->tokens.at(i), d->mirror_boundary);
641 d->analysis_managers.at(i) = std::move(analyser);
644 parse_common_params(d.get(), in, vsapi);
645 parse_expr_tiling_params(d.get(), in, vsapi);
647 }
catch (
const std::exception& e) {
648 for (
auto* node : d->nodes) {
649 if (node !=
nullptr) {
650 vsapi->freeNode(node);
653 vsapi->mapSetError(out, std::format(
"Expr: {}", e.what()).c_str());
657 std::vector<VSFilterDependency> deps;
658 deps.reserve(d->nodes.size());
659 for (
auto* node : d->nodes) {
660 deps.push_back({node, rpStrictSpatial});
663 VSVideoInfo* vi_ptr = &d->vi;
665 vsapi->createVideoFilter(out,
"Expr", vi_ptr, exprGetFrame, exprFree,
666 fmParallel, deps.data(),
667 static_cast<int>(deps.size()), d.release(), core);
672singleExprFree(
void* instanceData, [[maybe_unused]] VSCore* core,
673 const VSAPI* vsapi) {
675 genericFree<SingleExprData>(instanceData, core, vsapi);
681 singleExprGetFrame(
int n,
int activationReason,
void* instanceData,
682 [[maybe_unused]]
void** frameData,
683 VSFrameContext* frameCtx, VSCore* core,
684 const VSAPI* vsapi) {
686 auto* d =
static_cast<SingleExprData*
>(instanceData);
688 if (activationReason == arInitial) {
689 for (
int i = 0; i < d->num_inputs; ++i) {
690 vsapi->requestFrameFilter(n, d->nodes[i], frameCtx);
692 }
else if (activationReason == arAllFramesReady) {
693 g_frame_data.dynamic_arrays.clear();
695 std::vector<const VSFrame*> src_frames(d->num_inputs);
696 for (
int i = 0; i < d->num_inputs; ++i) {
697 src_frames[i] = vsapi->getFrameFilter(n, d->nodes[i], frameCtx);
700 std::array<const VSFrame*, 3> plane_src = {src_frames[0], src_frames[0],
702 std::array<int, 3> planes = {0, 1, 2};
703 VSFrame* dst_frame = vsapi->newVideoFrame2(
704 &d->vi.format, d->vi.width, d->vi.height, plane_src.data(),
705 planes.data(), src_frames[0], core);
707 int num_planes = d->vi.format.numPlanes;
708 std::vector<uint8_t*> rwptrs((d->num_inputs + 1) * num_planes);
709 std::vector<int> strides((d->num_inputs + 1) * num_planes);
710 std::vector<float> props(1 + d->required_props.size() +
711 d->output_props.size());
713 read_frame_properties(props, src_frames, d->required_props, n, vsapi);
715 for (
size_t i = 0; i < d->output_props.size(); ++i) {
716 props[1 + d->required_props.size() + i] =
720 for (
int i = 0; i <= d->num_inputs; ++i) {
721 for (
int p = 0; p < num_planes; ++p) {
722 rwptrs[(i * num_planes) + p] =
724 ? vsapi->getWritePtr(dst_frame, p)
727 vsapi->getReadPtr(src_frames[i - 1], p));
728 strides[(i * num_planes) + p] =
static_cast<int>(
729 (i == 0) ? vsapi->getStride(dst_frame, p)
730 : vsapi->getStride(src_frames[i - 1], p));
734 if (d->compiled.func_ptr ==
nullptr) {
735 std::vector<const VSVideoInfo*> vi(d->num_inputs);
736 for (
int i = 0; i < d->num_inputs; ++i) {
737 vi[i] = vsapi->getVideoInfo(d->nodes[i]);
740 std::string expr_str;
741 for (
const auto& token : d->tokens) {
742 if (!expr_str.empty()) {
745 expr_str += token.text;
748 std::vector<std::string> output_prop_names;
749 output_prop_names.reserve(d->output_props.size());
750 for (
const auto& p : d->output_props) {
751 output_prop_names.push_back(p.first);
754 const std::string key = generate_cache_key(
755 expr_str, &d->vi, vsapi, vi, d->mirror_boundary, d->prop_map,
756 d->vi.width, d->vi.height, output_prop_names, 0, 0,
757 d->opt_level, d->approx_math);
761 size_t key_hash = std::hash<std::string>{}(key);
762 std::string func_name =
763 std::format(
"process_single_expr_{}", key_hash);
767 *d->analysis_manager);
769 std::vector<Token>(d->tokens), &d->vi, vi, d->vi.width,
770 d->vi.height, d->mirror_boundary, d->dump_ir_path,
771 d->prop_map, func_name, d->opt_level, d->approx_math,
774 }
catch (
const std::exception& e) {
775 for (
const auto& frame : src_frames) {
776 vsapi->freeFrame(frame);
778 vsapi->freeFrame(dst_frame);
785 d->compiled.func_ptr(d, rwptrs.data(), strides.data(), props.data());
788 enum class ResolvedPropWriteType : std::uint8_t {
Int,
Float };
789 std::vector<ResolvedPropWriteType> resolved_types;
790 resolved_types.reserve(d->output_props.size());
791 const VSMap* src_props = vsapi->getFramePropertiesRO(src_frames[0]);
793 for (
const auto& prop_info : d->output_props) {
794 const auto& prop_name = prop_info.first;
795 const auto prop_write_type = prop_info.second;
797 switch (prop_write_type) {
799 resolved_types.push_back(ResolvedPropWriteType::Int);
802 resolved_types.push_back(ResolvedPropWriteType::Float);
806 resolved_types.push_back(ResolvedPropWriteType::Float);
811 vsapi->mapGetType(src_props, prop_name.c_str());
812 if (existing_type == ptInt) {
813 resolved_types.push_back(ResolvedPropWriteType::Int);
814 }
else if (existing_type == ptFloat) {
815 resolved_types.push_back(ResolvedPropWriteType::Float);
818 resolved_types.push_back(ResolvedPropWriteType::Int);
820 resolved_types.push_back(ResolvedPropWriteType::Float);
827 VSMap* dst_props = vsapi->getFramePropertiesRW(dst_frame);
828 for (
size_t i = 0; i < d->output_props.size(); ++i) {
829 const auto& prop_name = d->output_props[i].first;
830 float value = props[1 + d->required_props.size() + i];
837 vsapi->mapDeleteKey(dst_props, prop_name.c_str());
841 if (resolved_types[i] == ResolvedPropWriteType::Int) {
842 auto int_value =
static_cast<int64_t
>(lroundf(value));
843 vsapi->mapSetInt(dst_props, prop_name.c_str(), int_value,
846 vsapi->mapSetFloat(dst_props, prop_name.c_str(), value,
851 for (
const auto& frame : src_frames) {
852 vsapi->freeFrame(frame);
862singleExprCreate(
const VSMap* in, VSMap* out, [[maybe_unused]]
void* userData,
863 VSCore* core,
const VSAPI* vsapi) {
865 auto d = std::make_unique<SingleExprData>();
869 validate_and_init_clips<false>(d.get(), in, vsapi);
870 parse_format_param(d.get(), in, vsapi, core);
872 d->mirror_boundary = vsapi->mapGetInt(in,
"boundary", 0, &err) != 0;
874 const char* expr_str = vsapi->mapGetData(in,
"expr", 0, &err);
876 throw std::runtime_error(
"An expression must be provided.");
879 bool use_infix = vsapi->mapGetInt(in,
"infix", 0, &err) != 0;
881 std::string processed_expr;
883 std::map<std::string, std::string> macros;
884 macros[
"__SINGLEEXPR__"] =
"";
885 macros[
"__NUM_PLANES__"] = std::to_string(d->vi.format.numPlanes);
886 macros[
"__WIDTH__"] = std::to_string(d->vi.width);
887 macros[
"__HEIGHT__"] = std::to_string(d->vi.height);
888 macros[
"__INPUT_NUM__"] = std::to_string(d->num_inputs);
889 macros[
"__OUTPUT_BITDEPTH__"] =
890 std::to_string(d->vi.format.bitsPerSample);
891 macros[
"__OUTPUT_COLORFAMILY__"] =
892 std::to_string(d->vi.format.colorFamily);
893 macros[
"__SUBSAMPLE_W__"] =
894 std::to_string(d->vi.format.subSamplingW);
895 macros[
"__SUBSAMPLE_H__"] =
896 std::to_string(d->vi.format.subSamplingH);
897 macros[
"__OUTPUT_SAMPLETYPE__"] =
898 std::to_string((d->vi.format.sampleType == stFloat) ? 1 : 0);
900 for (
int i = 0; i < d->num_inputs; ++i) {
901 const VSVideoInfo* input_vi = vsapi->getVideoInfo(d->nodes[i]);
902 macros[std::format(
"__INPUT_BITDEPTH_{}__", i)] =
903 std::to_string(input_vi->format.bitsPerSample);
904 macros[std::format(
"__INPUT_COLORFAMILY_{}__", i)] =
905 std::to_string(input_vi->format.colorFamily);
906 macros[std::format(
"__INPUT_NUM_PLANES_{}__", i)] =
907 std::to_string(input_vi->format.numPlanes);
908 macros[std::format(
"__INPUT_SAMPLETYPE_{}__", i)] =
910 (input_vi->format.sampleType == stFloat) ? 1 : 0);
911 macros[std::format(
"__INPUT_WIDTH_{}__", i)] =
912 std::to_string(input_vi->width);
913 macros[std::format(
"__INPUT_HEIGHT_{}__", i)] =
914 std::to_string(input_vi->height);
915 macros[std::format(
"__INPUT_SUBSAMPLE_W_{}__", i)] =
916 std::to_string(input_vi->format.subSamplingW);
917 macros[std::format(
"__INPUT_SUBSAMPLE_H_{}__", i)] =
918 std::to_string(input_vi->format.subSamplingH);
924 processed_expr = expr_str;
934 static_opt_pass.
run(d->tokens, temp_am);
936 dyn_opt_pass.
run(d->tokens, temp_am);
939 for (
const auto& token : d->tokens) {
942 const auto& payload =
943 std::get<TokenPayloadPlaneDim>(token.payload);
944 if (payload.plane_idx < 0 ||
945 payload.plane_idx >= d->vi.format.numPlanes) {
946 throw std::runtime_error(
947 std::format(
"Invalid plane index {} in token '{}'",
948 payload.plane_idx, token.text));
952 const auto& payload =
953 std::get<TokenPayloadPropAccess>(token.payload);
954 auto key = std::make_pair(payload.clip_idx, payload.prop_name);
955 if (!d->prop_map.contains(key)) {
956 d->prop_map[key] =
static_cast<int>(
958 d->required_props.size());
959 d->required_props.push_back(key);
962 const auto& payload =
963 std::get<TokenPayloadPropStore>(token.payload);
964 if (!d->output_prop_map.contains(payload.prop_name)) {
965 d->output_prop_map[payload.prop_name] =
966 static_cast<int>(d->output_props.size());
967 d->output_props.emplace_back(payload.prop_name,
973 auto analyser = std::make_unique<analysis::AnalysisManager>(
974 d->tokens, d->mirror_boundary, 0);
977 d->analysis_manager = std::move(analyser);
979 parse_common_params(d.get(), in, vsapi);
981 }
catch (
const std::exception& e) {
982 for (
auto* node : d->nodes) {
983 if (node !=
nullptr) {
984 vsapi->freeNode(node);
987 vsapi->mapSetError(out,
988 std::format(
"SingleExpr: {}", e.what()).c_str());
992 std::vector<VSFilterDependency> deps;
993 deps.reserve(d->nodes.size());
994 for (
auto* node : d->nodes) {
995 deps.push_back({node, rpStrictSpatial});
998 VSVideoInfo* vi_ptr = &d->vi;
1000 vsapi->createVideoFilter(out,
"SingleExpr", vi_ptr, singleExprGetFrame,
1001 singleExprFree, fmParallel, deps.data(),
1002 static_cast<int>(deps.size()), d.release(), core);
1005struct VkExprData : BaseExprData {
1006 std::array<PlaneOp, 3> plane_op = {};
1007 std::array<std::vector<std::vector<Token>>, 3> tokens_stages;
1008 std::array<std::vector<std::unique_ptr<analysis::AnalysisManager>>, 3>
1012 int num_streams = 8;
1013 std::unique_ptr<vkexpr::VkExprExecutor> executor;
1015 std::string dump_glsl_path;
1021 vkExprGetFrame(
int n,
int activationReason,
void* instanceData,
1022 [[maybe_unused]]
void** frameData, VSFrameContext* frameCtx,
1023 VSCore* core,
const VSAPI* vsapi) {
1025 auto* d =
static_cast<VkExprData*
>(instanceData);
1027 if (activationReason == arInitial) {
1028 for (
int i = 0; i < d->num_inputs; ++i) {
1029 vsapi->requestFrameFilter(n, d->nodes[i], frameCtx);
1031 }
else if (activationReason == arAllFramesReady) {
1032 std::vector<const VSFrame*> src_frames(d->num_inputs);
1033 for (
int i = 0; i < d->num_inputs; ++i) {
1034 src_frames[i] = vsapi->getFrameFilter(n, d->nodes[i], frameCtx);
1037 std::array<const VSFrame*, 3> plane_src = {
1038 d->plane_op.at(0) == PlaneOp::PoCopy ? src_frames[0] :
nullptr,
1039 d->plane_op.at(1) == PlaneOp::PoCopy ? src_frames[0] :
nullptr,
1040 d->plane_op.at(2) == PlaneOp::PoCopy ? src_frames[0] :
nullptr};
1041 std::array<int, 3> planes = {0, 1, 2};
1042 VSFrame* dst_frame = vsapi->newVideoFrame2(
1043 &d->vi.format, d->vi.width, d->vi.height, plane_src.data(),
1044 planes.data(), src_frames[0], core);
1046 std::vector<float> props(1 + d->required_props.size());
1047 read_frame_properties(props, src_frames, d->required_props, n, vsapi);
1049 for (
int plane = 0; plane < d->vi.format.numPlanes; ++plane) {
1050 if (d->plane_op.at(plane) != PlaneOp::PoProcess) {
1055 d->executor->processPlane(plane, n, src_frames, dst_frame,
1058 }
catch (
const std::exception& e) {
1059 for (
const auto& frame : src_frames) {
1060 vsapi->freeFrame(frame);
1062 vsapi->freeFrame(dst_frame);
1063 vsapi->setFilterError(
1064 std::format(
"VkExpr: GPU error: {}", e.what()).c_str(),
1070 for (
const auto& frame : src_frames) {
1071 vsapi->freeFrame(frame);
1081vkExprFree(
void* instanceData, [[maybe_unused]] VSCore* core,
1082 const VSAPI* vsapi) {
1084 auto* raw =
static_cast<VkExprData*
>(instanceData);
1085 raw->executor.reset();
1086 std::unique_ptr<VkExprData> d(raw);
1087 for (
auto* node : d->nodes) {
1088 vsapi->freeNode(node);
1094vkExprCreate(
const VSMap* in, VSMap* out, [[maybe_unused]]
void* userData,
1095 VSCore* core,
const VSAPI* vsapi) {
1097 auto d = std::make_unique<VkExprData>();
1102 validate_and_init_clips<true>(d.get(), in, vsapi);
1104 parse_format_param(d.get(), in, vsapi, core);
1106 const int nexpr = vsapi->mapNumElements(in,
"expr");
1108 throw std::runtime_error(
1109 "At least one expression must be provided.");
1112 std::array<std::string, 3> expr_strs;
1113 for (
int i = 0; i < nexpr && i < 3; ++i) {
1114 expr_strs.at(i) = vsapi->mapGetData(in,
"expr", i, &err);
1116 for (
int i = nexpr; i < d->vi.format.numPlanes; ++i) {
1117 expr_strs.at(i) = expr_strs.at(nexpr - 1);
1120 d->mirror_boundary = vsapi->mapGetInt(in,
"boundary", 0, &err) != 0;
1122 const char* dump_glsl_path =
1123 vsapi->mapGetData(in,
"dump_glsl", 0, &err);
1124 if ((err == 0) && (dump_glsl_path !=
nullptr)) {
1125 d->dump_glsl_path = dump_glsl_path;
1128 bool use_infix = vsapi->mapGetInt(in,
"infix", 0, &err) != 0;
1130 std::array<std::vector<std::string>, 3> processed_stages;
1131 for (
int i = 0; i < nexpr && i < 3; ++i) {
1132 std::string raw_expr = expr_strs.at(i);
1133 std::vector<std::string> stages;
1134 constexpr std::string_view POSTFIX_STAGE_SEPARATOR =
"##";
1135 constexpr std::string_view INFIX_STAGE_SEPARATOR =
"---";
1136 const std::string_view stage_separator =
1137 use_infix ? INFIX_STAGE_SEPARATOR : POSTFIX_STAGE_SEPARATOR;
1139 while ((pos = raw_expr.find(stage_separator)) !=
1140 std::string::npos) {
1141 stages.push_back(raw_expr.substr(0, pos));
1142 raw_expr.erase(0, pos + stage_separator.size());
1144 stages.push_back(raw_expr);
1147 std::map<std::string, std::string> macros;
1148 macros[
"__GPU__"] =
"";
1149 macros[
"__EXPR__"] =
"";
1150 macros[
"__NUM_PLANES__"] =
1151 std::to_string(d->vi.format.numPlanes);
1152 macros[
"__WIDTH__"] = std::to_string(d->vi.width);
1153 macros[
"__HEIGHT__"] = std::to_string(d->vi.height);
1154 macros[
"__INPUT_NUM__"] = std::to_string(d->num_inputs);
1155 macros[
"__OUTPUT_BITDEPTH__"] =
1156 std::to_string(d->vi.format.bitsPerSample);
1157 macros[
"__OUTPUT_COLORFAMILY__"] =
1158 std::to_string(d->vi.format.colorFamily);
1159 macros[
"__SUBSAMPLE_W__"] =
1160 std::to_string(d->vi.format.subSamplingW);
1161 macros[
"__SUBSAMPLE_H__"] =
1162 std::to_string(d->vi.format.subSamplingH);
1163 macros[
"__PLANE_NO__"] = std::to_string(i);
1164 macros[
"__OUTPUT_SAMPLETYPE__"] = std::to_string(
1165 (d->vi.format.sampleType == stFloat) ? 1 : 0);
1167 for (
int j = 0; j < d->num_inputs; ++j) {
1168 const VSVideoInfo* input_vi =
1169 vsapi->getVideoInfo(d->nodes[j]);
1170 macros[std::format(
"__INPUT_BITDEPTH_{}__", j)] =
1171 std::to_string(input_vi->format.bitsPerSample);
1172 macros[std::format(
"__INPUT_COLORFAMILY_{}__", j)] =
1173 std::to_string(input_vi->format.colorFamily);
1174 macros[std::format(
"__INPUT_NUM_PLANES_{}__", j)] =
1175 std::to_string(input_vi->format.numPlanes);
1176 macros[std::format(
"__INPUT_SAMPLETYPE_{}__", j)] =
1178 (input_vi->format.sampleType == stFloat) ? 1 : 0);
1181 for (
size_t stage_idx = 0; stage_idx < stages.size();
1183 auto& stage = stages[stage_idx];
1184 if (!stage.empty()) {
1187 ¯os,
static_cast<int>(stage_idx));
1191 processed_stages.at(i) = stages;
1193 for (
int i = nexpr; i < d->vi.format.numPlanes; ++i) {
1194 processed_stages.at(i) = processed_stages.at(nexpr - 1);
1197 for (
int i = 0; i < d->vi.format.numPlanes; ++i) {
1198 if (processed_stages.at(i).empty() ||
1199 (processed_stages.at(i).size() == 1 &&
1200 processed_stages.at(i)[0].empty())) {
1201 d->plane_op.at(i) = PlaneOp::PoCopy;
1203 d->plane_op.at(i) = PlaneOp::PoProcess;
1205 auto& plane_stages = processed_stages.at(i);
1206 d->tokens_stages.at(i).resize(plane_stages.size());
1207 d->analysis_managers.at(i).resize(plane_stages.size());
1209 for (
size_t s = 0; s < plane_stages.size(); ++s) {
1210 d->tokens_stages.at(i).at(s) =
1211 tokenize(plane_stages.at(s), d->num_inputs,
1214 for (
const auto& token : d->tokens_stages.at(i).at(s)) {
1217 const auto& payload =
1218 std::get<TokenPayloadPropAccess>(token.payload);
1219 auto key = std::make_pair(payload.clip_idx,
1221 if (!d->prop_map.contains(key)) {
1222 d->prop_map[key] =
static_cast<int>(
1223 1 + d->required_props.size());
1224 d->required_props.push_back(key);
1229 auto analyser = std::make_unique<analysis::AnalysisManager>(
1230 d->tokens_stages.at(i).at(s), d->mirror_boundary);
1233 d->analysis_managers.at(i).at(s) = std::move(analyser);
1239 static_cast<int>(vsapi->mapGetInt(in,
"num_streams", 0, &err));
1240 if (err != 0 || d->num_streams < 1) {
1246 static_cast<int>(vsapi->mapGetInt(in,
"device_id", 0, &err));
1250 if (d->device_id < -1) {
1251 throw std::runtime_error(
"device_id must be >= -1");
1254 auto num_props_floats =
1255 static_cast<uint32_t
>(1 + d->required_props.size());
1257 std::array<std::vector<std::string>, 3> glsl_stages;
1258 for (
int i = 0; i < d->vi.format.numPlanes; ++i) {
1259 if (d->plane_op.at(i) != PlaneOp::PoProcess) {
1263 auto num_stages = d->tokens_stages.at(i).size();
1264 glsl_stages.at(i).resize(num_stages);
1266 for (
size_t s = 0; s < num_stages; ++s) {
1268 *d->analysis_managers.at(i).at(s));
1270 d->tokens_stages.at(i).at(s), d->num_inputs,
1271 static_cast<int>(s),
1272 d->vi.width / (i == 0 ? 1 : d->vi.format.subSamplingW),
1273 d->vi.height / (i == 0 ? 1 : d->vi.format.subSamplingH),
1274 d->mirror_boundary, d->prop_map, analysis_results);
1276 glsl_stages.at(i).at(s) = generator.
generate();
1278 if (!d->dump_glsl_path.empty()) {
1279 std::string plane_specific_path = d->dump_glsl_path;
1280 size_t dot_pos = plane_specific_path.rfind(
'.');
1281 std::string suffix = std::format(
".plane{}.stage{}", i, s);
1282 if (dot_pos != std::string::npos) {
1283 plane_specific_path.insert(dot_pos, suffix);
1285 plane_specific_path += suffix;
1288 std::ofstream glsl_file(plane_specific_path);
1289 if (glsl_file.is_open()) {
1290 glsl_file << glsl_stages.at(i).at(s);
1297 d->executor = std::make_unique<vkexpr::VkExprExecutor>(
1298 d->device_id, d->num_streams, d->num_inputs, std::move(glsl_stages),
1301 }
catch (
const std::exception& e) {
1302 for (
auto* node : d->nodes) {
1303 if (node !=
nullptr) {
1304 vsapi->freeNode(node);
1307 vsapi->mapSetError(out, std::format(
"VkExpr: {}", e.what()).c_str());
1311 std::vector<VSFilterDependency> deps;
1312 deps.reserve(d->nodes.size());
1313 for (
auto* node : d->nodes) {
1314 deps.push_back({node, rpStrictSpatial});
1317 VSVideoInfo* vi_ptr = &d->vi;
1319 vsapi->createVideoFilter(out,
"VkExpr", vi_ptr, vkExprGetFrame, vkExprFree,
1320 fmParallel, deps.data(),
1321 static_cast<int>(deps.size()), d.release(), core);
1332 auto& array = g_frame_data.dynamic_arrays[std::string(name)];
1333 if (
static_cast<size_t>(requested_size) > array.buffer.size()) {
1334 array.buffer.resize(requested_size);
1336 return array.buffer.data();
1340 auto it = g_frame_data.dynamic_arrays.find(std::string(name));
1341 return (it != g_frame_data.dynamic_arrays.end())
1342 ?
static_cast<int64_t
>(it->second.buffer.size())
1349VS_EXTERNAL_API(
void)
1352 vspapi->configPlugin(
1353 "com.yuygfgg.llvmexpr",
"llvmexpr",
"LLVM JIT RPN Expression Filter",
1354 VS_MAKE_VERSION(4, 4), VAPOURSYNTH_API_VERSION, 0, plugin);
1355 vspapi->registerFunction(
1357 "clips:vnode[];expr:data[];format:int:opt;boundary:int:opt;"
1358 "dump_ir:data:opt;opt_level:int:opt;approx_math:int:opt;infix:int:opt;"
1359 "tile_x:int:opt;tile_y:int:opt;",
1360 "clip:vnode;", exprCreate,
nullptr, plugin);
1361 vspapi->registerFunction(
"SingleExpr",
1362 "clips:vnode[];expr:data;format:int:opt;boundary:"
1363 "int:opt;dump_ir:data:opt;opt_"
1364 "level:int:opt;approx_math:int:opt;infix:int:opt;",
1365 "clip:vnode;", singleExprCreate,
nullptr, plugin);
1367 vspapi->registerFunction(
"VkExpr",
1368 "clips:vnode[];expr:data[];format:int:opt;"
1369 "boundary:int:opt;num_streams:int:opt;device_id:"
1370 "int:opt;dump_glsl:data:opt;infix:int:opt;",
1371 "clip:vnode;", vkExprCreate,
nullptr, plugin);
std::string convert_infix_to_postfix(const std::string &infix_expr, int num_inputs, infix2postfix::Mode mode, const std::map< std::string, std::string > *predefined_macros, int num_intermediate_inputs)
std::unordered_map< std::string, CompiledFunction > jit_cache
std::vector< Token > tokenize(const std::string &expr, int num_inputs, ExprMode mode, int num_intermediate_inputs)
CompiledFunction compile()
PreservedAnalyses run(std::vector< Token > &tokens, AnalysisManager &am) override
PreservedAnalyses run(std::vector< Token > &tokens, AnalysisManager &am) override
float * llvmexpr_ensure_buffer(const char *name, int64_t requested_size)
constexpr uint32_t PROP_WRITE_NAN_PAYLOAD
constexpr uint32_t PROP_READ_NAN_PAYLOAD
constexpr uint32_t PROP_DELETE_NAN_PAYLOAD
int64_t llvmexpr_get_buffer_size(const char *name)
VapourSynthPluginInit2(VSPlugin *plugin, const VSPLUGINAPI *vspapi)