-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Mostly because Julia does not create optimal code when doing cyclic bit shifts, see JuliaLang/julia#11592 and JuliaLang/julia#19923
- Loading branch information
Mayeul d'Avezac
committed
May 30, 2017
1 parent
d306c0a
commit 1e966a8
Showing
1 changed file
with
272 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,272 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"rotr_j! (generic function with 1 method)" | ||
] | ||
}, | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"using Cxx, ExtractRandom, BenchmarkTools\n", | ||
"rotr_j(x::Integer, k::Integer) = (x >>> k) | (x << (8sizeof(x) - k))\n", | ||
"rotr_j!{T <: Integer}(x::Vector{T}, k::Vector{T}, out::Vector{T}) = begin\n", | ||
" const N = 8sizeof(T)\n", | ||
" @inbounds for i in eachindex(x)\n", | ||
" const xᵢ = x[i]\n", | ||
" const kᵢ = k[i]\n", | ||
" out[i] = (xᵢ >>> kᵢ) | (xᵢ << (N - kᵢ))\n", | ||
" end\n", | ||
"end" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"rotr_c (generic function with 2 methods)" | ||
] | ||
}, | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"cxx\"\"\"\n", | ||
"template<class T> T rotr(T x, T k) {\n", | ||
" auto const N = 8 * sizeof(decltype(x));\n", | ||
" return (x >> k) | (x << (N - k));\n", | ||
"}\n", | ||
"\n", | ||
"template<class T> void rotr(T n, T const *a, T const * k, T * out) {\n", | ||
" for(int64_t i(0); i < n; ++i)\n", | ||
" *(out + i) = rotr(*(a + i), *(k + i));\n", | ||
"}\n", | ||
"\"\"\"\n", | ||
"rotr_c(x::AbstractVector, k::AbstractVector, out::AbstractVector) = begin\n", | ||
" @cxx rotr(length(x), pointer(x), pointer(k), pointer(out))\n", | ||
"end\n", | ||
"rotr_c(x::Integer, k::Integer) = @cxx rotr(x, k)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"BenchmarkTools.Trial: \n", | ||
" memory estimate: 32 bytes\n", | ||
" allocs estimate: 1\n", | ||
" --------------\n", | ||
" minimum time: 4.782 μs (0.00% GC)\n", | ||
" median time: 4.921 μs (0.00% GC)\n", | ||
" mean time: 5.013 μs (0.00% GC)\n", | ||
" maximum time: 17.294 μs (0.00% GC)\n", | ||
" --------------\n", | ||
" samples: 10000\n", | ||
" evals/sample: 7" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"a, b, c = ones(Int64, 1000), ones(Int64, 1000), zeros(Int64, 1000)\n", | ||
"@benchmark $c .= rotr_j.($a, $b)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"BenchmarkTools.Trial: \n", | ||
" memory estimate: 0 bytes\n", | ||
" allocs estimate: 0\n", | ||
" --------------\n", | ||
" minimum time: 4.247 μs (0.00% GC)\n", | ||
" median time: 4.254 μs (0.00% GC)\n", | ||
" mean time: 4.345 μs (0.00% GC)\n", | ||
" maximum time: 12.985 μs (0.00% GC)\n", | ||
" --------------\n", | ||
" samples: 10000\n", | ||
" evals/sample: 7" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"@benchmark rotr_j!($a, $b, $c)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"BenchmarkTools.Trial: \n", | ||
" memory estimate: 32 bytes\n", | ||
" allocs estimate: 1\n", | ||
" --------------\n", | ||
" minimum time: 3.383 μs (0.00% GC)\n", | ||
" median time: 3.385 μs (0.00% GC)\n", | ||
" mean time: 3.483 μs (0.00% GC)\n", | ||
" maximum time: 10.842 μs (0.00% GC)\n", | ||
" --------------\n", | ||
" samples: 10000\n", | ||
" evals/sample: 8" | ||
] | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"@benchmark $a .= rotr_c.($b, $c)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"BenchmarkTools.Trial: \n", | ||
" memory estimate: 0 bytes\n", | ||
" allocs estimate: 0\n", | ||
" --------------\n", | ||
" minimum time: 1.962 μs (0.00% GC)\n", | ||
" median time: 1.990 μs (0.00% GC)\n", | ||
" mean time: 2.112 μs (0.00% GC)\n", | ||
" maximum time: 13.743 μs (0.00% GC)\n", | ||
" --------------\n", | ||
" samples: 10000\n", | ||
" evals/sample: 10" | ||
] | ||
}, | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"@benchmark rotr_c($a, $b, $c)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"BenchmarkTools.Trial: \n", | ||
" memory estimate: 0 bytes\n", | ||
" allocs estimate: 0\n", | ||
" --------------\n", | ||
" minimum time: 1.696 ns (0.00% GC)\n", | ||
" median time: 1.797 ns (0.00% GC)\n", | ||
" mean time: 1.879 ns (0.00% GC)\n", | ||
" maximum time: 15.830 ns (0.00% GC)\n", | ||
" --------------\n", | ||
" samples: 10000\n", | ||
" evals/sample: 1000" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"@benchmark rotr_j(1, 2)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"BenchmarkTools.Trial: \n", | ||
" memory estimate: 0 bytes\n", | ||
" allocs estimate: 0\n", | ||
" --------------\n", | ||
" minimum time: 3.923 ns (0.00% GC)\n", | ||
" median time: 3.929 ns (0.00% GC)\n", | ||
" mean time: 4.016 ns (0.00% GC)\n", | ||
" maximum time: 34.771 ns (0.00% GC)\n", | ||
" --------------\n", | ||
" samples: 10000\n", | ||
" evals/sample: 1000" | ||
] | ||
}, | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"@benchmark rotr_c(1, 2)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Julia 0.5.2", | ||
"language": "julia", | ||
"name": "julia-0.5" | ||
}, | ||
"language_info": { | ||
"file_extension": ".jl", | ||
"mimetype": "application/julia", | ||
"name": "julia", | ||
"version": "0.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |