README.md, expr2/exprfilter.cpp: add dynamic pixel access support

For example, `x[2,-1]` can be rewritten as `X 2 + Y 1 - x[]` to trade performance for flexibility. Use the `x[]` form only as a last resort as it requires gather load instructions to be efficient (i.e. avx2 on x86). Of course, because we use LLVM, avx2 is not required. Signed-off-by: akarin <i@akarin.info>
AkarinVS · Jul 11, 2022 · 35b7fb6 · 35b7fb6
1 parent de866cb
commit 35b7fb6
Show file tree

Hide file tree

Showing 2 changed files with 47 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -75,6 +75,7 @@ This works just like [`std.Expr`](http://www.vapoursynth.com/doc/functions/expr.
  - The `boundary` argument specifies the default boundary condition for all relative pixel accesses without explicit specification:
  - 0 means clamped
  - 1 means mirrored
+- (\*) Dynamic pixel access using absolute coordinates. Use `absX absY x[]` to access the pixel (absX, absY) in the current frame of clip x. absX and absY can be computed using arbitrary expressions, and they are clamped to be within their respecitive ranges (i.e. boundary pixels are repeated indefinitely.) Only use this as a last resort as the performance is likely worse than static relative pixel access, depending on access pattern.
 - Support more bases for constants
  - hexadecimals: 0x123 or 0x123.4p5
  - octals: 023 (however, invalid octal numbers will be parsed as floating points, so "09" will be parsed the same as "9.0")
@@ -96,6 +97,7 @@ Use this function to query the version and features of the plugin. It will retur
  b'x[x,y]:m' # relative pixel access with mirrored boundary condition
  b'drop', # dropN support
  b'sort', # sortN support
+ b'x[]', # dynamic pixel access
 ]
 ```
 

diff --git a/expr2/exprfilter.cpp b/expr2/exprfilter.cpp
@@ -51,7 +51,8 @@ namespace {
 
 enum class ExprOpType {
  // Terminals.
- MEM_LOAD, CONSTANTI, CONSTANTF, CONST_LOAD,
+ MEM_LOAD, MEM_LOAD_VAR,
+ CONSTANTI, CONSTANTF, CONST_LOAD,
  VAR_LOAD, VAR_STORE,
 
  // Arithmetic primitives.
@@ -79,13 +80,14 @@ enum class ExprOpType {
 std::vector<std::string> features = {
  "x.property",
  "sin", "cos",
- "%", "clip", "clamp",
+ "%", "clip", "clamp", "**",
  "N", "X", "Y", "pi", "width", "height",
  "trunc", "round", "floor",
  "var@", "var!",
  "x[x,y]", "x[x,y]:m",
  "drop",
  "sort",
+ "x[]",
 };
 
 enum class ComparisonType {
@@ -237,6 +239,7 @@ ExprOp decodeToken(const std::string &token)
  {"height",{ ExprOpType::CONST_LOAD, static_cast<int>(LoadConstType::Height) } },
  };
  static const std::regex relpixelRe { "^([a-z])\\[(-?[0-9]+),(-?[0-9]+)\\](:[cm])?$" };
+ static const std::regex abspixelRe { "^([a-z])\\[\\]$" };
  std::smatch match;
 
  auto it = simple.find(token);
@@ -279,6 +282,10 @@ ExprOp decodeToken(const std::string &token)
  BoundaryCondition bc = flag.size() == 0 ? BoundaryCondition::Unspecified :
  (flag[1] == 'm' ? BoundaryCondition::Mirrored : BoundaryCondition::Clamped);
  return{ ExprOpType::MEM_LOAD, clip[0] >= 'x' ? clip[0] - 'x' : clip[0] - 'a' + 3, "", atoi(sx.c_str()), atoi(sy.c_str()), bc };
+ } else if (std::regex_match(token, match, abspixelRe)) {
+ ASSERT(match.size() == 2);
+ auto clip = match[1].str();
+ return{ ExprOpType::MEM_LOAD_VAR, clip[0] >= 'x' ? clip[0] - 'x' : clip[0] - 'a' + 3 };
  } else {
  size_t pos = 0;
  long long l = 0;
@@ -428,6 +435,7 @@ class Compiler {
  IntV i() { return std::get<IntV>(v); }
 
  FloatV ensureFloat() { return isFloat() ? f() : FloatV(i()); }
+ IntV ensureInt() { return isFloat() ? IntV(RoundInt(f())) : i(); }
 
  Value Max(Value &rhs) { return (isFloat() || rhs.isFloat()) ? Value(rr::Max(f(), rhs.f())) : Value(rr::Max(i(), rhs.i())); }
  Value Min(Value &rhs) { return (isFloat() || rhs.isFloat()) ? Value(rr::Min(f(), rhs.f())) : Value(rr::Min(i(), rhs.i())); }
@@ -688,6 +696,7 @@ void Compiler<lanes>::buildOneIter(const Helper &helpers, State &state)
 {
  constexpr unsigned char numOperands[] = {
  0, // MEM_LOAD
+ 2, // MEM_LOAD_VAR
  0, // CONSTANTI
  0, // CONSTANTF
  0, // CONST_LOAD
@@ -816,7 +825,7 @@ void Compiler<lanes>::buildOneIter(const Helper &helpers, State &state)
  if (regularLoad)
  v = IntV(*Pointer<IntV>(p, (unaligned ? 1:lanes)*sizeof(uint32_t)));
  else
- v = IntV(Gather(Pointer<Int>(p), offsets, IntV(~0), sizeof(uint16_t)));
+ v = IntV(Gather(Pointer<Int>(p), offsets, IntV(~0), sizeof(uint32_t)));
  }
  v = relativeAccessAdjust<lanes>(x, state.x, state.width, op, v);
  if (ctx.forceFloat())
@@ -920,6 +929,39 @@ void Compiler<lanes>::buildOneIter(const Helper &helpers, State &state)
  break; \
  }
 
+ case ExprOpType::MEM_LOAD_VAR: {
+ LOAD2(absx_, absy_);
+
+ const VSFormat *format = ctx.vi[op.imm.i]->format;
+ Pointer<Byte> p = state.wptrs[op.imm.i + 1];
+ IntV stride = state.strides[op.imm.i + 1], size = format->bytesPerSample;
+ IntV absx = Min(Max(absx_.ensureInt(), IntV(0)), IntV(state.width-1));
+ IntV absy = Min(Max(absy_.ensureInt(), IntV(0)), IntV(state.height-1));
+ IntV offsets = absy * stride + absx * size;
+
+ if (format->sampleType == stInteger) {
+ IntV v;
+ if (format->bytesPerSample == 1)
+ v = IntV(Gather(Pointer<Byte>(p), offsets, IntV(~0), sizeof(uint8_t)));
+ else if (format->bytesPerSample == 2)
+ v = IntV(Gather(Pointer<UShort>(p), offsets, IntV(~0), sizeof(uint16_t)));
+ else if (format->bytesPerSample == 4)
+ v = IntV(Gather(Pointer<Int>(p), offsets, IntV(~0), sizeof(uint32_t)));
+ if (ctx.forceFloat())
+ OUT(FloatV(v));
+ else
+ OUT(v);
+ } else if (format->sampleType == stFloat) {
+ FloatV v;
+ if (format->bytesPerSample == 2)
+ abort(); // XXX: f16 not supported
+ else if (format->bytesPerSample == 4)
+ v = Gather(Pointer<Float>(p), offsets, IntV(~0), sizeof(float));
+ OUT(v);
+ }
+ break;
+ }
+
  case ExprOpType::VAR_LOAD:
  OUT(state.variables[op.imm.i]);
  break;