add src

dnjulek · Nov 14, 2023 · b5d4f7e · b5d4f7e
1 parent 053c813
commit b5d4f7e
Show file tree

Hide file tree

Showing 12 changed files with 948 additions and 0 deletions.
diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml
@@ -0,0 +1,36 @@
+# This is a basic workflow to help you get started with Actions
+
+name: Build (Linux)
+
+# Controls when the workflow will run
+on:
+ # Triggers the workflow on push or pull request events but only for the "main" branch
+ push:
+ branches: [ "main" ]
+ pull_request:
+ branches: [ "main" ]
+
+ # Allows you to run this workflow manually from the Actions tab
+ workflow_dispatch:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+ # This workflow contains a single job called "build"
+ build:
+ # The type of runner that the job will run on
+ runs-on: ubuntu-latest
+
+ # Steps represent a sequence of tasks that will be executed as part of the job
+ steps:
+ # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+ - uses: actions/checkout@v3
+
+ # Runs a single command using the runners shell
+ - name: install zig
+ run: sudo snap install zig --classic --edge
+
+ # Runs a set of commands using the runners shell 
+ - name: build
+ run: |
+ zig build -Doptimize=ReleaseFast
+ ls zig-out/lib
diff --git a/.github/workflows/windows-build.yml b/.github/workflows/windows-build.yml
@@ -0,0 +1,37 @@
+# This is a basic workflow to help you get started with Actions
+
+name: Build (Windows)
+
+# Controls when the workflow will run
+on:
+ # Triggers the workflow on push or pull request events but only for the "main" branch
+ push:
+ branches: [ "main" ]
+ pull_request:
+ branches: [ "main" ]
+
+ # Allows you to run this workflow manually from the Actions tab
+ workflow_dispatch:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+ # This workflow contains a single job called "build"
+ build:
+ # The type of runner that the job will run on
+ runs-on: windows-latest
+
+ # Steps represent a sequence of tasks that will be executed as part of the job
+ steps:
+ # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+ - uses: actions/checkout@v3
+
+ # Runs a set of commands using the runners shell
+ - name: install zig
+ uses: goto-bus-stop/setup-zig@v2.1.1
+ with:
+ cache: false
+
+ - name: build
+ run: |
+ zig build -Doptimize=ReleaseFast
+ ls zig-out\lib
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+zig-cache/
+zig-out/
+.vscode/
+run.bat
diff --git a/README.md b/README.md
@@ -0,0 +1,36 @@
+# vapoursynth-chromanr
+[![Linux](https://github.com/dnjulek/vapoursynth-ssimulacra2/actions/workflows/linux-build.yml/badge.svg)](https://github.com/dnjulek/vapoursynth-ssimulacra2/actions/workflows/linux-build.yml)
+[![Windows](https://github.com/dnjulek/vapoursynth-ssimulacra2/actions/workflows/windows-build.yml/badge.svg)](https://github.com/dnjulek/vapoursynth-ssimulacra2/actions/workflows/windows-build.yml)
+
+[SSIMULACRA2](https://github.com/cloudinary/ssimulacra2) for VapourSynth with Zig.
+
+This implementation doesn't have the exact same result as the original, because a different gaussian blur algorithm is used,\
+a recursive gaussian blur is used there, and in this one a "true" gaussian blur with better performance is used.\
+With this we get up to +70% speed.
+
+If you want to use the original algorithm with VapourSynth, [see here](https://github.com/dnjulek/vapoursynth-julek-plugin/wiki/SSIMULACRA).
+
+## Usage
+```python
+ssimulacra2.SSIMULACRA2(vnode reference, vnode distorted)
+```
+
+```python
+ref = YUV420P8 clip
+dist = YUV420P8 clip
+
+# Only works with RGBS format.
+ref = ref.resize.Bicubic(format=vs.RGBS, matrix_in=1)
+dist = dist.resize.Bicubic(format=vs.RGBS, matrix_in=1)
+
+# Must be converted from gamma to linear with fmtc because resize/zimg uses another formula.
+ref = ref.fmtc.transfer(transs="srgb", transd="linear", bits=32)
+dist = dist.fmtc.transfer(transs="srgb", transd="linear", bits=32)
+
+ssim = core.ssimulacra2.SSIMULACRA2(ref, dist)
+```
+
+## Building
+Zig ver >= 0.12.0-dev.1594
+
+``zig build -Doptimize=ReleaseFast``
diff --git a/build.zig b/build.zig
@@ -0,0 +1,27 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+ const target = b.standardTargetOptions(.{});
+ const optimize = b.standardOptimizeOption(.{});
+
+ const lib = b.addSharedLibrary(.{
+ .name = "ssimulacra2",
+ .root_source_file = .{ .path = "src/ssimulacra2.zig" },
+ .target = target,
+ .optimize = optimize,
+ });
+
+ const vapoursynth_dep = b.dependency("vapoursynth", .{
+ .target = target,
+ .optimize = optimize,
+ });
+
+ lib.addModule("vapoursynth", vapoursynth_dep.module("vapoursynth"));
+ lib.linkLibC();
+
+ if (lib.optimize == .ReleaseFast) {
+ lib.strip = true;
+ }
+
+ b.installArtifact(lib);
+}
diff --git a/build.zig.zon b/build.zig.zon
@@ -0,0 +1,11 @@
+.{
+ .name = "SSIMULACRA2",
+ .version = "1.0.0",
+ .paths = .{""},
+ .dependencies = .{
+ .vapoursynth = .{
+ .url = "https://github.com/dnjulek/vapoursynth-zig/archive/11809b4e8047c15fa5de10a3c3ae15d1546630ea.tar.gz",
+ .hash = "12208a2b305e3a3cfb509f56320d839d1a7706946ca2baa099088361a71e899461d6",
+ },
+ },
+}
diff --git a/src/blur.zig b/src/blur.zig
@@ -0,0 +1,111 @@
+const std = @import("std");
+const allocator = std.heap.c_allocator;
+
+inline fn blur_h(srcp: anytype, dstp: [*]f32, kernel: [9]f32, width: usize) void {
+ const ksize: usize = 9;
+ const radius: usize = ksize >> 1;
+
+ var j: usize = 0;
+ while (j < @min(width, radius)) : (j += 1) {
+ const dist_from_right: usize = width - 1 - j;
+ var accum: f32 = 0.0;
+ var k: usize = 0;
+ while (k < radius) : (k += 1) {
+ const idx: usize = if (j < radius - k) (@min(radius - k - j, width - 1)) else (j - radius + k);
+ accum += kernel[k] * srcp[idx];
+ }
+
+ k = radius;
+ while (k < ksize) : (k += 1) {
+ const idx: usize = if (dist_from_right < k - radius) (j - @min(k - radius - dist_from_right, j)) else (j - radius + k);
+ accum += kernel[k] * srcp[idx];
+ }
+
+ dstp[j] = accum;
+ }
+
+ j = radius;
+ while (j < width - @min(width, radius)) : (j += 1) {
+ var accum: f32 = 0.0;
+ var k: usize = 0;
+ while (k < ksize) : (k += 1) {
+ accum += kernel[k] * srcp[j - radius + k];
+ }
+
+ dstp[j] = accum;
+ }
+
+ j = @max(radius, width - @min(width, radius));
+ while (j < width) : (j += 1) {
+ const dist_from_right: usize = width - 1 - j;
+ var accum: f32 = 0.0;
+ var k: usize = 0;
+ while (k < radius) : (k += 1) {
+ const idx: usize = if (j < radius - k) (@min(radius - k - j, width - 1)) else (j - radius + k);
+ accum += kernel[k] * srcp[idx];
+ }
+
+ k = radius;
+ while (k < ksize) : (k += 1) {
+ const idx: usize = if (dist_from_right < k - radius) (j - @min(k - radius - dist_from_right, j)) else (j - radius + k);
+ accum += kernel[k] * srcp[idx];
+ }
+
+ dstp[j] = accum;
+ }
+}
+
+inline fn blur_v(src: anytype, dstp: [*]f32, kernel: [9]f32, width: usize) void {
+ var j: usize = 0;
+ while (j < width) : (j += 1) {
+ var accum: f32 = 0.0;
+ var k: usize = 0;
+ while (k < 9) : (k += 1) {
+ accum += kernel[k] * src[k][j];
+ }
+
+ dstp[j] = accum;
+ }
+}
+
+pub inline fn process(src: [*]const f32, dst: [*]f32, stride: usize, width: usize, height: usize) void {
+ const kernel = [9]f32{
+ 0.0076144188642501831054687500,
+ 0.0360749699175357818603515625,
+ 0.1095860823988914489746093750,
+ 0.2134445458650588989257812500,
+ 0.2665599882602691650390625000,
+ 0.2134445458650588989257812500,
+ 0.1095860823988914489746093750,
+ 0.0360749699175357818603515625,
+ 0.0076144188642501831054687500,
+ };
+
+ const ksize: usize = 9;
+ const radius: usize = ksize >> 1;
+ var i: usize = 0;
+ while (i < height) : (i += 1) {
+ var srcp: [9][*]const f32 = undefined;
+ var dstp: [*]f32 = dst + i * stride;
+ const dist_from_bottom: usize = height - 1 - i;
+
+ var tmp_arr = allocator.alignedAlloc(f32, 32, width) catch unreachable;
+ defer allocator.free(tmp_arr);
+ var tmp: [*]f32 = tmp_arr.ptr;
+
+ var k: usize = 0;
+ while (k < radius) : (k += 1) {
+ const row: usize = if (i < radius - k) (@min(radius - k - i, height - 1)) else (i - radius + k);
+ srcp[k] = src + row * stride;
+ }
+
+ k = radius;
+ while (k < ksize) : (k += 1) {
+ const row: usize = if (dist_from_bottom < k - radius) (i - @min(k - radius - dist_from_bottom, i)) else (i - radius + k);
+ srcp[k] = src + row * stride;
+ }
+
+ blur_v(srcp, tmp, kernel, width);
+ blur_h(tmp, dstp, kernel, width);
+ }
+}
diff --git a/src/downscale.zig b/src/downscale.zig
@@ -0,0 +1,32 @@
+pub inline fn process(src: [3][*]f32, dst: [3][*]f32, src_stride: usize, in_w: usize, in_h: usize) void {
+ const fscale: f32 = 2.0;
+ const uscale: usize = 2;
+ const out_w = @divTrunc((in_w + uscale - 1), uscale);
+ const out_h = @divTrunc((in_h + uscale - 1), uscale);
+ const dst_stride = @divTrunc((src_stride + uscale - 1), uscale);
+ const normalize: f32 = 1.0 / (fscale * fscale);
+
+ var plane: usize = 0;
+ while (plane < 3) : (plane += 1) {
+ var srcp = src[plane];
+ var dstp = dst[plane];
+ var oy: usize = 0;
+ while (oy < out_h) : (oy += 1) {
+ var ox: usize = 0;
+ while (ox < out_w) : (ox += 1) {
+ var sum: f32 = 0.0;
+ var iy: usize = 0;
+ while (iy < uscale) : (iy += 1) {
+ var ix: usize = 0;
+ while (ix < uscale) : (ix += 1) {
+ const x: usize = @min((ox * uscale + ix), (in_w - 1));
+ const y: usize = @min((oy * uscale + iy), (in_h - 1));
+ sum += srcp[y * src_stride + x];
+ }
+ }
+ dstp[ox] = sum * normalize;
+ }
+ dstp += dst_stride;
+ }
+ }
+}
diff --git a/src/multiply.zig b/src/multiply.zig
@@ -0,0 +1,19 @@
+const vec_t: type = @Vector(16, f32);
+
+inline fn process_vec(src1: anytype, src2: anytype, dst: []f32) void {
+ dst[0..16].* = @as(vec_t, src1[0..16].*) * @as(vec_t, src2[0..16].*);
+}
+
+pub inline fn process(src1: [*]const f32, src2: [*]const f32, dst: [*]f32, stride: usize, width: usize, height: usize) void {
+ var y: usize = 0;
+ while (y < height) : (y += 1) {
+ var srcp1 = src1 + y * stride;
+ var srcp2 = src2 + y * stride;
+ var dstp = dst + y * stride;
+ var x: usize = 0;
+ while (x < width) : (x += 16) {
+ const x2: usize = x + 16;
+ process_vec(srcp1[x..x2], srcp2[x..x2], dstp[x..x2]);
+ }
+ }
+}