Skip to content

Commit

Permalink
add src
Browse files Browse the repository at this point in the history
  • Loading branch information
dnjulek committed Nov 14, 2023
1 parent 053c813 commit b5d4f7e
Show file tree
Hide file tree
Showing 12 changed files with 948 additions and 0 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# This is a basic workflow to help you get started with Actions

name: Build (Linux)

# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the "main" branch
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3

# Runs a single command using the runners shell
- name: install zig
run: sudo snap install zig --classic --edge

# Runs a set of commands using the runners shell
- name: build
run: |
zig build -Doptimize=ReleaseFast
ls zig-out/lib
37 changes: 37 additions & 0 deletions .github/workflows/windows-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# This is a basic workflow to help you get started with Actions

name: Build (Windows)

# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the "main" branch
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: windows-latest

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3

# Runs a set of commands using the runners shell
- name: install zig
uses: goto-bus-stop/setup-zig@v2.1.1
with:
cache: false

- name: build
run: |
zig build -Doptimize=ReleaseFast
ls zig-out\lib
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
zig-cache/
zig-out/
.vscode/
run.bat
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# vapoursynth-chromanr
[![Linux](https://github.com/dnjulek/vapoursynth-ssimulacra2/actions/workflows/linux-build.yml/badge.svg)](https://github.com/dnjulek/vapoursynth-ssimulacra2/actions/workflows/linux-build.yml)
[![Windows](https://github.com/dnjulek/vapoursynth-ssimulacra2/actions/workflows/windows-build.yml/badge.svg)](https://github.com/dnjulek/vapoursynth-ssimulacra2/actions/workflows/windows-build.yml)

[SSIMULACRA2](https://github.com/cloudinary/ssimulacra2) for VapourSynth with Zig.

This implementation doesn't have the exact same result as the original, because a different gaussian blur algorithm is used,\
a recursive gaussian blur is used there, and in this one a "true" gaussian blur with better performance is used.\
With this we get up to +70% speed.

If you want to use the original algorithm with VapourSynth, [see here](https://github.com/dnjulek/vapoursynth-julek-plugin/wiki/SSIMULACRA).

## Usage
```python
ssimulacra2.SSIMULACRA2(vnode reference, vnode distorted)
```

```python
ref = YUV420P8 clip
dist = YUV420P8 clip

# Only works with RGBS format.
ref = ref.resize.Bicubic(format=vs.RGBS, matrix_in=1)
dist = dist.resize.Bicubic(format=vs.RGBS, matrix_in=1)

# Must be converted from gamma to linear with fmtc because resize/zimg uses another formula.
ref = ref.fmtc.transfer(transs="srgb", transd="linear", bits=32)
dist = dist.fmtc.transfer(transs="srgb", transd="linear", bits=32)

ssim = core.ssimulacra2.SSIMULACRA2(ref, dist)
```

## Building
Zig ver >= 0.12.0-dev.1594

``zig build -Doptimize=ReleaseFast``
27 changes: 27 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const std = @import("std");

pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});

const lib = b.addSharedLibrary(.{
.name = "ssimulacra2",
.root_source_file = .{ .path = "src/ssimulacra2.zig" },
.target = target,
.optimize = optimize,
});

const vapoursynth_dep = b.dependency("vapoursynth", .{
.target = target,
.optimize = optimize,
});

lib.addModule("vapoursynth", vapoursynth_dep.module("vapoursynth"));
lib.linkLibC();

if (lib.optimize == .ReleaseFast) {
lib.strip = true;
}

b.installArtifact(lib);
}
11 changes: 11 additions & 0 deletions build.zig.zon
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.{
.name = "SSIMULACRA2",
.version = "1.0.0",
.paths = .{""},
.dependencies = .{
.vapoursynth = .{
.url = "https://github.com/dnjulek/vapoursynth-zig/archive/11809b4e8047c15fa5de10a3c3ae15d1546630ea.tar.gz",
.hash = "12208a2b305e3a3cfb509f56320d839d1a7706946ca2baa099088361a71e899461d6",
},
},
}
111 changes: 111 additions & 0 deletions src/blur.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
const std = @import("std");
const allocator = std.heap.c_allocator;

inline fn blur_h(srcp: anytype, dstp: [*]f32, kernel: [9]f32, width: usize) void {
const ksize: usize = 9;
const radius: usize = ksize >> 1;

var j: usize = 0;
while (j < @min(width, radius)) : (j += 1) {
const dist_from_right: usize = width - 1 - j;
var accum: f32 = 0.0;
var k: usize = 0;
while (k < radius) : (k += 1) {
const idx: usize = if (j < radius - k) (@min(radius - k - j, width - 1)) else (j - radius + k);
accum += kernel[k] * srcp[idx];
}

k = radius;
while (k < ksize) : (k += 1) {
const idx: usize = if (dist_from_right < k - radius) (j - @min(k - radius - dist_from_right, j)) else (j - radius + k);
accum += kernel[k] * srcp[idx];
}

dstp[j] = accum;
}

j = radius;
while (j < width - @min(width, radius)) : (j += 1) {
var accum: f32 = 0.0;
var k: usize = 0;
while (k < ksize) : (k += 1) {
accum += kernel[k] * srcp[j - radius + k];
}

dstp[j] = accum;
}

j = @max(radius, width - @min(width, radius));
while (j < width) : (j += 1) {
const dist_from_right: usize = width - 1 - j;
var accum: f32 = 0.0;
var k: usize = 0;
while (k < radius) : (k += 1) {
const idx: usize = if (j < radius - k) (@min(radius - k - j, width - 1)) else (j - radius + k);
accum += kernel[k] * srcp[idx];
}

k = radius;
while (k < ksize) : (k += 1) {
const idx: usize = if (dist_from_right < k - radius) (j - @min(k - radius - dist_from_right, j)) else (j - radius + k);
accum += kernel[k] * srcp[idx];
}

dstp[j] = accum;
}
}

inline fn blur_v(src: anytype, dstp: [*]f32, kernel: [9]f32, width: usize) void {
var j: usize = 0;
while (j < width) : (j += 1) {
var accum: f32 = 0.0;
var k: usize = 0;
while (k < 9) : (k += 1) {
accum += kernel[k] * src[k][j];
}

dstp[j] = accum;
}
}

pub inline fn process(src: [*]const f32, dst: [*]f32, stride: usize, width: usize, height: usize) void {
const kernel = [9]f32{
0.0076144188642501831054687500,
0.0360749699175357818603515625,
0.1095860823988914489746093750,
0.2134445458650588989257812500,
0.2665599882602691650390625000,
0.2134445458650588989257812500,
0.1095860823988914489746093750,
0.0360749699175357818603515625,
0.0076144188642501831054687500,
};

const ksize: usize = 9;
const radius: usize = ksize >> 1;
var i: usize = 0;
while (i < height) : (i += 1) {
var srcp: [9][*]const f32 = undefined;
var dstp: [*]f32 = dst + i * stride;
const dist_from_bottom: usize = height - 1 - i;

var tmp_arr = allocator.alignedAlloc(f32, 32, width) catch unreachable;
defer allocator.free(tmp_arr);
var tmp: [*]f32 = tmp_arr.ptr;

var k: usize = 0;
while (k < radius) : (k += 1) {
const row: usize = if (i < radius - k) (@min(radius - k - i, height - 1)) else (i - radius + k);
srcp[k] = src + row * stride;
}

k = radius;
while (k < ksize) : (k += 1) {
const row: usize = if (dist_from_bottom < k - radius) (i - @min(k - radius - dist_from_bottom, i)) else (i - radius + k);
srcp[k] = src + row * stride;
}

blur_v(srcp, tmp, kernel, width);
blur_h(tmp, dstp, kernel, width);
}
}
32 changes: 32 additions & 0 deletions src/downscale.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
pub inline fn process(src: [3][*]f32, dst: [3][*]f32, src_stride: usize, in_w: usize, in_h: usize) void {
const fscale: f32 = 2.0;
const uscale: usize = 2;
const out_w = @divTrunc((in_w + uscale - 1), uscale);
const out_h = @divTrunc((in_h + uscale - 1), uscale);
const dst_stride = @divTrunc((src_stride + uscale - 1), uscale);
const normalize: f32 = 1.0 / (fscale * fscale);

var plane: usize = 0;
while (plane < 3) : (plane += 1) {
var srcp = src[plane];
var dstp = dst[plane];
var oy: usize = 0;
while (oy < out_h) : (oy += 1) {
var ox: usize = 0;
while (ox < out_w) : (ox += 1) {
var sum: f32 = 0.0;
var iy: usize = 0;
while (iy < uscale) : (iy += 1) {
var ix: usize = 0;
while (ix < uscale) : (ix += 1) {
const x: usize = @min((ox * uscale + ix), (in_w - 1));
const y: usize = @min((oy * uscale + iy), (in_h - 1));
sum += srcp[y * src_stride + x];
}
}
dstp[ox] = sum * normalize;
}
dstp += dst_stride;
}
}
}
19 changes: 19 additions & 0 deletions src/multiply.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const vec_t: type = @Vector(16, f32);

inline fn process_vec(src1: anytype, src2: anytype, dst: []f32) void {
dst[0..16].* = @as(vec_t, src1[0..16].*) * @as(vec_t, src2[0..16].*);
}

pub inline fn process(src1: [*]const f32, src2: [*]const f32, dst: [*]f32, stride: usize, width: usize, height: usize) void {
var y: usize = 0;
while (y < height) : (y += 1) {
var srcp1 = src1 + y * stride;
var srcp2 = src2 + y * stride;
var dstp = dst + y * stride;
var x: usize = 0;
while (x < width) : (x += 16) {
const x2: usize = x + 16;
process_vec(srcp1[x..x2], srcp2[x..x2], dstp[x..x2]);
}
}
}
Loading

0 comments on commit b5d4f7e

Please sign in to comment.