diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json
index 4ec2921..f4cc931 100644
--- a/dev/.documenter-siteinfo.json
+++ b/dev/.documenter-siteinfo.json
@@ -1 +1 @@
-{"documenter":{"julia_version":"1.11.1","generation_timestamp":"2024-11-12T18:42:29","documenter_version":"1.7.0"}}
\ No newline at end of file
+{"documenter":{"julia_version":"1.11.1","generation_timestamp":"2024-11-13T17:27:01","documenter_version":"1.8.0"}}
\ No newline at end of file
diff --git a/dev/api/accumulate/index.html b/dev/api/accumulate/index.html
index 00181b7..c050290 100644
--- a/dev/api/accumulate/index.html
+++ b/dev/api/accumulate/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Accumulate · AcceleratedKernels.jl</title><meta name="title" content="Accumulate · AcceleratedKernels.jl"/><meta property="og:title" content="Accumulate · AcceleratedKernels.jl"/><meta property="twitter:title" content="Accumulate · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/accumulate/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/accumulate/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/accumulate/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li class="is-active"><a class="tocitem" href>Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Accumulate</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Accumulate</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/accumulate.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Accumulate-/-Prefix-Sum-/-Scan"><a class="docs-heading-anchor" href="#Accumulate-/-Prefix-Sum-/-Scan">Accumulate / Prefix Sum / Scan</a><a id="Accumulate-/-Prefix-Sum-/-Scan-1"></a><a class="docs-heading-anchor-permalink" href="#Accumulate-/-Prefix-Sum-/-Scan" title="Permalink"></a></h3><div class="markdown"><p>Compute accumulated running totals along a sequence by applying a binary operator to all elements up to the current one; often used in GPU programming as a first step in finding / extracting subsets of data.</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Accumulate · AcceleratedKernels.jl</title><meta name="title" content="Accumulate · AcceleratedKernels.jl"/><meta property="og:title" content="Accumulate · AcceleratedKernels.jl"/><meta property="twitter:title" content="Accumulate · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/accumulate/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/accumulate/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/accumulate/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li class="is-active"><a class="tocitem" href>Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Accumulate</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Accumulate</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/accumulate.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Accumulate-/-Prefix-Sum-/-Scan"><a class="docs-heading-anchor" href="#Accumulate-/-Prefix-Sum-/-Scan">Accumulate / Prefix Sum / Scan</a><a id="Accumulate-/-Prefix-Sum-/-Scan-1"></a><a class="docs-heading-anchor-permalink" href="#Accumulate-/-Prefix-Sum-/-Scan" title="Permalink"></a></h3><div class="markdown"><p>Compute accumulated running totals along a sequence by applying a binary operator to all elements up to the current one; often used in GPU programming as a first step in finding / extracting subsets of data.</p>
 <ul>
 <li><p><code>accumulate&#33;</code> &#40;in-place&#41;, <code>accumulate</code> &#40;allocating&#41;; inclusive or exclusive.</p>
 </li>
@@ -8,11 +8,11 @@
 </ul>
 <p>Function signature:</p>
 <pre><code class="language-julia">accumulate&#33;&#40;op, v::AbstractGPUVector; init, inclusive::Bool&#61;true,
-            block_size::Int&#61;128,
+            block_size::Int&#61;256,
             temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing,
             temp_flags::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;
 accumulate&#40;op, v::AbstractGPUVector; init, inclusive::Bool&#61;true,
-           block_size::Int&#61;128,
+           block_size::Int&#61;256,
            temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing,
            temp_flags::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;</code></pre>
 <p>Example computing an inclusive prefix sum &#40;the typical GPU &quot;scan&quot;&#41;:</p>
@@ -22,4 +22,4 @@
 v &#61; oneAPI.ones&#40;Int32, 100_000&#41;
 AK.accumulate&#33;&#40;&#43;, v, init&#61;0&#41;</code></pre>
 <p>The temporaries <code>temp</code> and <code>temp_flags</code> should both have at least <code>&#40;length&#40;v&#41; &#43; 2 * block_size - 1&#41; ÷ &#40;2 * block_size&#41;</code> elements; <code>eltype&#40;v&#41; &#61;&#61;&#61; eltype&#40;temp&#41;</code>; the elements in <code>temp_flags</code> can be any integers, but <code>Int8</code> is used by default to reduce memory usage. </p>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../mapreduce/">« MapReduce</a><a class="docs-footer-nextpage" href="../binarysearch/">Binary Search »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../mapreduce/">« MapReduce</a><a class="docs-footer-nextpage" href="../binarysearch/">Binary Search »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/binarysearch/index.html b/dev/api/binarysearch/index.html
index 96d81ae..c4b5c83 100644
--- a/dev/api/binarysearch/index.html
+++ b/dev/api/binarysearch/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Binary Search · AcceleratedKernels.jl</title><meta name="title" content="Binary Search · AcceleratedKernels.jl"/><meta property="og:title" content="Binary Search · AcceleratedKernels.jl"/><meta property="twitter:title" content="Binary Search · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/binarysearch/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/binarysearch/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/binarysearch/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li class="is-active"><a class="tocitem" href>Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Binary Search</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Binary Search</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/binarysearch.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Binary-Search"><a class="docs-heading-anchor" href="#Binary-Search">Binary Search</a><a id="Binary-Search-1"></a><a class="docs-heading-anchor-permalink" href="#Binary-Search" title="Permalink"></a></h3><div class="markdown"><p>Find the indices where some elements <code>x</code> should be inserted into a sorted sequence <code>v</code> to maintain the sorted order. Effectively applying the Julia.Base functions in parallel on a GPU using <code>foreachindex</code>.</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Binary Search · AcceleratedKernels.jl</title><meta name="title" content="Binary Search · AcceleratedKernels.jl"/><meta property="og:title" content="Binary Search · AcceleratedKernels.jl"/><meta property="twitter:title" content="Binary Search · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/binarysearch/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/binarysearch/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/binarysearch/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li class="is-active"><a class="tocitem" href>Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Binary Search</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Binary Search</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/binarysearch.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Binary-Search"><a class="docs-heading-anchor" href="#Binary-Search">Binary Search</a><a id="Binary-Search-1"></a><a class="docs-heading-anchor-permalink" href="#Binary-Search" title="Permalink"></a></h3><div class="markdown"><p>Find the indices where some elements <code>x</code> should be inserted into a sorted sequence <code>v</code> to maintain the sorted order. Effectively applying the Julia.Base functions in parallel on a GPU using <code>foreachindex</code>.</p>
 <ul>
 <li><p><code>searchsortedfirst&#33;</code> &#40;in-place&#41;, <code>searchsortedfirst</code> &#40;allocating&#41;: index of first element in <code>v</code> &gt;&#61; <code>x&#91;j&#93;</code>.</p>
 </li>
@@ -49,4 +49,4 @@
 ix &#61; MtlArray&#123;Int&#125;&#40;undef, 10_000&#41;
 
 AK.searchsortedfirst&#33;&#40;ix, v, x&#41;</code></pre>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../accumulate/">« Accumulate</a><a class="docs-footer-nextpage" href="../predicates/">Predicates »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../accumulate/">« Accumulate</a><a class="docs-footer-nextpage" href="../predicates/">Predicates »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/custom_structs/index.html b/dev/api/custom_structs/index.html
index 0ef09ee..adc6100 100644
--- a/dev/api/custom_structs/index.html
+++ b/dev/api/custom_structs/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Custom Structs · AcceleratedKernels.jl</title><meta name="title" content="Custom Structs · AcceleratedKernels.jl"/><meta property="og:title" content="Custom Structs · AcceleratedKernels.jl"/><meta property="twitter:title" content="Custom Structs · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/custom_structs/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/custom_structs/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/custom_structs/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li class="is-active"><a class="tocitem" href>Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Custom Structs</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Custom Structs</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/custom_structs.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Custom-Structs"><a class="docs-heading-anchor" href="#Custom-Structs">Custom Structs</a><a id="Custom-Structs-1"></a><a class="docs-heading-anchor-permalink" href="#Custom-Structs" title="Permalink"></a></h3><div class="markdown"><p>As functions are compiled as/when used in Julia for the given argument types &#40;for C&#43;&#43; people: kind of like everything being a template argument by default&#41;, we can use custom structs and functions defined outside AcceleratedKernels.jl, which will be inlined and optimised as if they were hardcoded within the library. Normal Julia functions and code can be used, without special annotations like <code>__device__</code>, <code>KOKKOS_LAMBDA</code> or wrapping them in classes with overloaded <code>operator&#40;&#41;</code>.</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Custom Structs · AcceleratedKernels.jl</title><meta name="title" content="Custom Structs · AcceleratedKernels.jl"/><meta property="og:title" content="Custom Structs · AcceleratedKernels.jl"/><meta property="twitter:title" content="Custom Structs · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/custom_structs/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/custom_structs/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/custom_structs/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li class="is-active"><a class="tocitem" href>Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Custom Structs</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Custom Structs</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/custom_structs.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Custom-Structs"><a class="docs-heading-anchor" href="#Custom-Structs">Custom Structs</a><a id="Custom-Structs-1"></a><a class="docs-heading-anchor-permalink" href="#Custom-Structs" title="Permalink"></a></h3><div class="markdown"><p>As functions are compiled as/when used in Julia for the given argument types &#40;for C&#43;&#43; people: kind of like everything being a template argument by default&#41;, we can use custom structs and functions defined outside AcceleratedKernels.jl, which will be inlined and optimised as if they were hardcoded within the library. Normal Julia functions and code can be used, without special annotations like <code>__device__</code>, <code>KOKKOS_LAMBDA</code> or wrapping them in classes with overloaded <code>operator&#40;&#41;</code>.</p>
 <p>As an example, let&#39;s compute the coordinate-wise minima of some points:</p>
 <pre><code class="language-julia">import AcceleratedKernels as AK
 using Metal
@@ -23,4 +23,4 @@
 points &#61; MtlArray&#40;&#91;Point&#40;rand&#40;&#41;, rand&#40;&#41;&#41; for _ in 1:100_000&#93;&#41;
 @show minima &#61; compute_minima&#40;points&#41;</code></pre>
 <p>Note that we did not have to explicitly type the function arguments in <code>compute_minima</code> - the types would be figured out when calling the function and compiled for the right backend automatically, e.g. CPU, oneAPI, ROCm, CUDA, Metal. Also, we used the standard Julia function <code>min</code>; it was not special-cased anywhere, it&#39;s just KernelAbstractions.jl inlining and compiling normal code, even from within the Julia.Base standard library.</p>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../predicates/">« Predicates</a><a class="docs-footer-nextpage" href="../task_partition/">Task Partitioning »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../predicates/">« Predicates</a><a class="docs-footer-nextpage" href="../task_partition/">Task Partitioning »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/foreachindex/index.html b/dev/api/foreachindex/index.html
index 326006b..85b1217 100644
--- a/dev/api/foreachindex/index.html
+++ b/dev/api/foreachindex/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>General Loops · AcceleratedKernels.jl</title><meta name="title" content="General Loops · AcceleratedKernels.jl"/><meta property="og:title" content="General Loops · AcceleratedKernels.jl"/><meta property="twitter:title" content="General Loops · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/foreachindex/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/foreachindex/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/foreachindex/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li class="is-active"><a class="tocitem" href>General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>General Loops</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>General Loops</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/foreachindex.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="General-Looping"><a class="docs-heading-anchor" href="#General-Looping">General Looping</a><a id="General-Looping-1"></a><a class="docs-heading-anchor-permalink" href="#General-Looping" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="AcceleratedKernels.foreachindex" href="#AcceleratedKernels.foreachindex"><code>AcceleratedKernels.foreachindex</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">foreachindex(
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>General Loops · AcceleratedKernels.jl</title><meta name="title" content="General Loops · AcceleratedKernels.jl"/><meta property="og:title" content="General Loops · AcceleratedKernels.jl"/><meta property="twitter:title" content="General Loops · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/foreachindex/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/foreachindex/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/foreachindex/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li class="is-active"><a class="tocitem" href>General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>General Loops</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>General Loops</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/foreachindex.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="General-Looping"><a class="docs-heading-anchor" href="#General-Looping">General Looping</a><a id="General-Looping-1"></a><a class="docs-heading-anchor-permalink" href="#General-Looping" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="AcceleratedKernels.foreachindex" href="#AcceleratedKernels.foreachindex"><code>AcceleratedKernels.foreachindex</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">foreachindex(
     f, itr, backend::Backend=get_backend(itr);
 
     # CPU settings
@@ -14,26 +14,69 @@
 for i in eachindex(x)
     @inbounds y[i] = 2 * x[i] + 1
 end</code></pre><p>Using this function you can have the same for loop body over a GPU array:</p><pre><code class="language-julia hljs">using CUDA
+import AcceleratedKernels as AK
 const x = CuArray(1:100)
 const y = similar(x)
-foreachindex(x) do i
+AK.foreachindex(x) do i
     @inbounds y[i] = 2 * x[i] + 1
 end</code></pre><p>Note that the above code is pure arithmetic, which you can write directly (and on some platforms it may be faster) as:</p><pre><code class="language-julia hljs">using CUDA
 x = CuArray(1:100)
 y = 2 .* x .+ 1</code></pre><p><strong>Important note</strong>: to use this function on a GPU, the objects referenced inside the loop body must have known types - i.e. be inside a function, or <code>const</code> global objects; but you shouldn&#39;t use global objects anyways. For example:</p><pre><code class="language-julia hljs">using oneAPI
+import AcceleratedKernels as AK
 
 x = oneArray(1:100)
 
 # CRASHES - typical error message: &quot;Reason: unsupported dynamic function invocation&quot;
-# foreachindex(x) do i
+# AK.foreachindex(x) do i
 #     x[i] = i
 # end
 
 function somecopy!(v)
     # Because it is inside a function, the type of `v` will be known
-    foreachindex(v) do i
+    AK.foreachindex(v) do i
         v[i] = i
     end
 end
 
-somecopy!(x)    # This works</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/a3be263fb8dcc0e233ff46f4c5ec29f11191ff33/src/foreachindex.jl#L64-L139">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../using_backends/">« Using Different Backends</a><a class="docs-footer-nextpage" href="../map/">Map »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+somecopy!(x)    # This works</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/cb75d53e786033327c45d3d9a911701a4bc63446/src/foreachindex.jl#L63-L140">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="AcceleratedKernels.foraxes" href="#AcceleratedKernels.foraxes"><code>AcceleratedKernels.foraxes</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">foraxes(
+    f, itr, dims::Union{Nothing, &lt;:Integer}=nothing, backend::Backend=get_backend(itr);
+
+    # CPU settings
+    scheduler=:threads,
+    max_tasks=Threads.nthreads(),
+    min_elems=1,
+
+    # GPU settings
+    block_size=256,
+)</code></pre><p>Parallelised <code>for</code> loop over the indices along axis <code>dims</code> of an iterable.</p><p>It allows you to run normal Julia code on a GPU over multiple arrays - e.g. CuArray, ROCArray, MtlArray, oneArray - with one GPU thread per index.</p><p>On CPUs at most <code>max_tasks</code> threads are launched, or fewer such that each thread processes at least <code>min_elems</code> indices; if a single task ends up being needed, <code>f</code> is inlined and no thread is launched. Tune it to your function - the more expensive it is, the fewer elements are needed to amortise the cost of launching a thread (which is a few μs). The scheduler can be <code>:polyester</code> to use Polyester.jl cheap threads or <code>:threads</code> to use normal Julia threads; either can be faster depending on the function, but in general the latter is more composable.</p><p><strong>Examples</strong></p><p>Normally you would write a for loop like this:</p><pre><code class="language-julia hljs">x = Array(reshape(1:30, 3, 10))
+y = similar(x)
+for i in axes(x, 2)
+    for j in axes(x, 1)
+        @inbounds y[j, i] = 2 * x[j, i] + 1
+    end
+end</code></pre><p>Using this function you can have the same for loop body over a GPU array:</p><pre><code class="language-julia hljs">using CUDA
+import AcceleratedKernels as AK
+const x = CuArray(reshape(1:3000, 3, 1000))
+const y = similar(x)
+AK.foraxes(x, 2) do i
+    for j in axes(x, 1)
+        @inbounds y[j, i] = 2 * x[j, i] + 1
+    end
+end</code></pre><p><strong>Important note</strong>: to use this function on a GPU, the objects referenced inside the loop body must have known types - i.e. be inside a function, or <code>const</code> global objects; but you shouldn&#39;t use global objects anyways. For example:</p><pre><code class="language-julia hljs">using oneAPI
+import AcceleratedKernels as AK
+
+x = oneArray(reshape(1:3000, 3, 1000))
+
+# CRASHES - typical error message: &quot;Reason: unsupported dynamic function invocation&quot;
+# AK.foraxes(x) do i
+#     x[i] = i
+# end
+
+function somecopy!(v)
+    # Because it is inside a function, the type of `v` will be known
+    AK.foraxes(v) do i
+        v[i] = i
+    end
+end
+
+somecopy!(x)    # This works</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/cb75d53e786033327c45d3d9a911701a4bc63446/src/foreachindex.jl#L170-L243">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../using_backends/">« Using Different Backends</a><a class="docs-footer-nextpage" href="../map/">Map »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/map/index.html b/dev/api/map/index.html
index aabdaaa..6f27b86 100644
--- a/dev/api/map/index.html
+++ b/dev/api/map/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Map · AcceleratedKernels.jl</title><meta name="title" content="Map · AcceleratedKernels.jl"/><meta property="og:title" content="Map · AcceleratedKernels.jl"/><meta property="twitter:title" content="Map · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/map/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/map/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/map/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li class="is-active"><a class="tocitem" href>Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Map</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Map</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/map.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Map"><a class="docs-heading-anchor" href="#Map">Map</a><a id="Map-1"></a><a class="docs-heading-anchor-permalink" href="#Map" title="Permalink"></a></h3><div class="markdown"><p>Parallel mapping of a function over each element of an iterable via <code>foreachindex</code>:</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Map · AcceleratedKernels.jl</title><meta name="title" content="Map · AcceleratedKernels.jl"/><meta property="og:title" content="Map · AcceleratedKernels.jl"/><meta property="twitter:title" content="Map · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/map/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/map/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/map/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li class="is-active"><a class="tocitem" href>Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Map</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Map</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/map.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Map"><a class="docs-heading-anchor" href="#Map">Map</a><a id="Map-1"></a><a class="docs-heading-anchor-permalink" href="#Map" title="Permalink"></a></h3><div class="markdown"><p>Parallel mapping of a function over each element of an iterable via <code>foreachindex</code>:</p>
 <ul>
 <li><p><code>map&#33;</code> &#40;in-place&#41;, <code>map</code> &#40;out-of-place&#41;</p>
 </li>
@@ -36,4 +36,4 @@
 
     # GPU settings
     block_size=256,    
-)</code></pre><p>Apply the function <code>f</code> to each element of <code>src</code> and store the result in <code>dst</code>. The CPU and GPU settings are the same as for <a href="../foreachindex/#AcceleratedKernels.foreachindex"><code>foreachindex</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/a3be263fb8dcc0e233ff46f4c5ec29f11191ff33/src/map.jl#L1-L16">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../foreachindex/">« General Loops</a><a class="docs-footer-nextpage" href="../sort/">Sorting »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+)</code></pre><p>Apply the function <code>f</code> to each element of <code>src</code> and store the result in <code>dst</code>. The CPU and GPU settings are the same as for <a href="../foreachindex/#AcceleratedKernels.foreachindex"><code>foreachindex</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/cb75d53e786033327c45d3d9a911701a4bc63446/src/map.jl#L1-L16">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../foreachindex/">« General Loops</a><a class="docs-footer-nextpage" href="../sort/">Sorting »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/mapreduce/index.html b/dev/api/mapreduce/index.html
index e9046e2..1cb022a 100644
--- a/dev/api/mapreduce/index.html
+++ b/dev/api/mapreduce/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>MapReduce · AcceleratedKernels.jl</title><meta name="title" content="MapReduce · AcceleratedKernels.jl"/><meta property="og:title" content="MapReduce · AcceleratedKernels.jl"/><meta property="twitter:title" content="MapReduce · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/mapreduce/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/mapreduce/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/mapreduce/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li class="is-active"><a class="tocitem" href>MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>MapReduce</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>MapReduce</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/mapreduce.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="MapReduce"><a class="docs-heading-anchor" href="#MapReduce">MapReduce</a><a id="MapReduce-1"></a><a class="docs-heading-anchor-permalink" href="#MapReduce" title="Permalink"></a></h3><div class="markdown"><p>Equivalent to <code>reduce&#40;op, map&#40;f, iterable&#41;&#41;</code>, without saving the intermediate mapped collection; can be used to e.g. split documents into words &#40;map&#41; and count the frequency thereof &#40;reduce&#41;.</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>MapReduce · AcceleratedKernels.jl</title><meta name="title" content="MapReduce · AcceleratedKernels.jl"/><meta property="og:title" content="MapReduce · AcceleratedKernels.jl"/><meta property="twitter:title" content="MapReduce · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/mapreduce/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/mapreduce/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/mapreduce/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li class="is-active"><a class="tocitem" href>MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>MapReduce</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>MapReduce</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/mapreduce.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="MapReduce"><a class="docs-heading-anchor" href="#MapReduce">MapReduce</a><a id="MapReduce-1"></a><a class="docs-heading-anchor-permalink" href="#MapReduce" title="Permalink"></a></h3><div class="markdown"><p>Equivalent to <code>reduce&#40;op, map&#40;f, iterable&#41;&#41;</code>, without saving the intermediate mapped collection; can be used to e.g. split documents into words &#40;map&#41; and count the frequency thereof &#40;reduce&#41;.</p>
 <ul>
 <li><p><strong>Other names</strong>: <code>transform_reduce</code>, some <code>fold</code> implementations include the mapping function too.</p>
 </li>
@@ -52,4 +52,4 @@
 f(x) = x * x
 m = MtlArray(rand(Int32(1):Int32(100), 10, 100_000))
 mrowsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=1)
-mcolsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=2)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/a3be263fb8dcc0e233ff46f4c5ec29f11191ff33/src/reduce/reduce.jl#L139-L201">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../reduce/">« Reduce</a><a class="docs-footer-nextpage" href="../accumulate/">Accumulate »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+mcolsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=2)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/cb75d53e786033327c45d3d9a911701a4bc63446/src/reduce/reduce.jl#L139-L201">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../reduce/">« Reduce</a><a class="docs-footer-nextpage" href="../accumulate/">Accumulate »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/predicates/index.html b/dev/api/predicates/index.html
index fe43144..449194a 100644
--- a/dev/api/predicates/index.html
+++ b/dev/api/predicates/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Predicates · AcceleratedKernels.jl</title><meta name="title" content="Predicates · AcceleratedKernels.jl"/><meta property="og:title" content="Predicates · AcceleratedKernels.jl"/><meta property="twitter:title" content="Predicates · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/predicates/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/predicates/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/predicates/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li class="is-active"><a class="tocitem" href>Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Predicates</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Predicates</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/predicates.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Predicates"><a class="docs-heading-anchor" href="#Predicates">Predicates</a><a id="Predicates-1"></a><a class="docs-heading-anchor-permalink" href="#Predicates" title="Permalink"></a></h3><div class="markdown"><p>Apply a predicate to check if all / any elements in a collection return true. Could be implemented as a reduction, but is better optimised with stopping the search once a false / true is found.</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Predicates · AcceleratedKernels.jl</title><meta name="title" content="Predicates · AcceleratedKernels.jl"/><meta property="og:title" content="Predicates · AcceleratedKernels.jl"/><meta property="twitter:title" content="Predicates · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/predicates/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/predicates/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/predicates/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li class="is-active"><a class="tocitem" href>Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Predicates</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Predicates</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/predicates.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Predicates"><a class="docs-heading-anchor" href="#Predicates">Predicates</a><a id="Predicates-1"></a><a class="docs-heading-anchor-permalink" href="#Predicates" title="Permalink"></a></h3><div class="markdown"><p>Apply a predicate to check if all / any elements in a collection return true. Could be implemented as a reduction, but is better optimised with stopping the search once a false / true is found.</p>
 <ul>
 <li><p><strong>Other names</strong>: not often implemented standalone on GPUs, typically included as part of a reduction.</p>
 </li>
@@ -16,4 +16,4 @@
 v &#61; CuArray&#40;rand&#40;Float32, 100_000&#41;&#41;
 AK.any&#40;x -&gt; x &lt; 1, v&#41;
 AK.all&#40;x -&gt; x &gt; 0, v&#41;</code></pre>
-</div><p><strong>Note on the <code>cooperative</code> keyword</strong>: some older platforms crash when multiple threads write to the same memory location in a global array (e.g. old Intel Graphics); if all threads were to write the same value, it is well-defined on others (e.g. CUDA F4.2 says &quot;If a non-atomic instruction executed by a warp writes to the same location in global memory for more than one of the threads of the warp, only one thread performs a write and which thread does it is undefined.&quot;). This &quot;cooperative&quot; thread behaviour allows for a faster implementation; if you have a platform - the only one I know is Intel UHD Graphics - that crashes, set <code>cooperative=false</code> to use a safer <code>mapreduce</code>-based implementation.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../binarysearch/">« Binary Search</a><a class="docs-footer-nextpage" href="../custom_structs/">Custom Structs »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div><p><strong>Note on the <code>cooperative</code> keyword</strong>: some older platforms crash when multiple threads write to the same memory location in a global array (e.g. old Intel Graphics); if all threads were to write the same value, it is well-defined on others (e.g. CUDA F4.2 says &quot;If a non-atomic instruction executed by a warp writes to the same location in global memory for more than one of the threads of the warp, only one thread performs a write and which thread does it is undefined.&quot;). This &quot;cooperative&quot; thread behaviour allows for a faster implementation; if you have a platform - the only one I know is Intel UHD Graphics - that crashes, set <code>cooperative=false</code> to use a safer <code>mapreduce</code>-based implementation.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../binarysearch/">« Binary Search</a><a class="docs-footer-nextpage" href="../custom_structs/">Custom Structs »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/reduce/index.html b/dev/api/reduce/index.html
index cd6f93d..e559982 100644
--- a/dev/api/reduce/index.html
+++ b/dev/api/reduce/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Reduce · AcceleratedKernels.jl</title><meta name="title" content="Reduce · AcceleratedKernels.jl"/><meta property="og:title" content="Reduce · AcceleratedKernels.jl"/><meta property="twitter:title" content="Reduce · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/reduce/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/reduce/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/reduce/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li class="is-active"><a class="tocitem" href>Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Reduce</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Reduce</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/reduce.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Reductions"><a class="docs-heading-anchor" href="#Reductions">Reductions</a><a id="Reductions-1"></a><a class="docs-heading-anchor-permalink" href="#Reductions" title="Permalink"></a></h3><div class="markdown"><p>Apply a custom binary operator reduction on all elements in an iterable; can be used to compute minima, sums, counts, etc.</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Reduce · AcceleratedKernels.jl</title><meta name="title" content="Reduce · AcceleratedKernels.jl"/><meta property="og:title" content="Reduce · AcceleratedKernels.jl"/><meta property="twitter:title" content="Reduce · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/reduce/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/reduce/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/reduce/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li class="is-active"><a class="tocitem" href>Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Reduce</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Reduce</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/reduce.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Reductions"><a class="docs-heading-anchor" href="#Reductions">Reductions</a><a id="Reductions-1"></a><a class="docs-heading-anchor-permalink" href="#Reductions" title="Permalink"></a></h3><div class="markdown"><p>Apply a custom binary operator reduction on all elements in an iterable; can be used to compute minima, sums, counts, etc.</p>
 <ul>
 <li><p><strong>Other names</strong>: <code>Kokkos:parallel_reduce</code>, <code>fold</code>, <code>aggregate</code>.</p>
 </li>
@@ -55,4 +55,4 @@
 
 m = MtlArray(rand(Int32(1):Int32(100), 10, 100_000))
 mrowsum = AK.reduce(+, m; init=zero(eltype(m)), dims=1)
-mcolsum = AK.reduce(+, m; init=zero(eltype(m)), dims=2)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/a3be263fb8dcc0e233ff46f4c5ec29f11191ff33/src/reduce/reduce.jl#L8-L69">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../sort/">« Sorting</a><a class="docs-footer-nextpage" href="../mapreduce/">MapReduce »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+mcolsum = AK.reduce(+, m; init=zero(eltype(m)), dims=2)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/cb75d53e786033327c45d3d9a911701a4bc63446/src/reduce/reduce.jl#L8-L69">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../sort/">« Sorting</a><a class="docs-footer-nextpage" href="../mapreduce/">MapReduce »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/sort/index.html b/dev/api/sort/index.html
index bffea67..ddc87e0 100644
--- a/dev/api/sort/index.html
+++ b/dev/api/sort/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Sorting · AcceleratedKernels.jl</title><meta name="title" content="Sorting · AcceleratedKernels.jl"/><meta property="og:title" content="Sorting · AcceleratedKernels.jl"/><meta property="twitter:title" content="Sorting · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/sort/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/sort/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/sort/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li class="is-active"><a class="tocitem" href>Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Sorting</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Sorting</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/sort.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="sort-and-friends"><a class="docs-heading-anchor" href="#sort-and-friends"><code>sort</code> and friends</a><a id="sort-and-friends-1"></a><a class="docs-heading-anchor-permalink" href="#sort-and-friends" title="Permalink"></a></h3><div class="markdown"><p>Sorting algorithms with similar interface and default settings as the Julia Base ones, on GPUs:</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Sorting · AcceleratedKernels.jl</title><meta name="title" content="Sorting · AcceleratedKernels.jl"/><meta property="og:title" content="Sorting · AcceleratedKernels.jl"/><meta property="twitter:title" content="Sorting · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/sort/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/sort/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/sort/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li class="is-active"><a class="tocitem" href>Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Sorting</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Sorting</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/sort.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="sort-and-friends"><a class="docs-heading-anchor" href="#sort-and-friends"><code>sort</code> and friends</a><a id="sort-and-friends-1"></a><a class="docs-heading-anchor-permalink" href="#sort-and-friends" title="Permalink"></a></h3><div class="markdown"><p>Sorting algorithms with similar interface and default settings as the Julia Base ones, on GPUs:</p>
 <ul>
 <li><p><code>sort&#33;</code> &#40;in-place&#41;, <code>sort</code> &#40;out-of-place&#41;</p>
 </li>
@@ -11,11 +11,11 @@
 <p>Function signature:</p>
 <pre><code class="language-julia">sort&#33;&#40;v::AbstractGPUVector;
       lt&#61;isless, by&#61;identity, rev::Bool&#61;false, order::Base.Order.Ordering&#61;Base.Order.Forward,
-      block_size::Int&#61;128, temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;
+      block_size::Int&#61;256, temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;
 
 sortperm&#33;&#40;ix::AbstractGPUVector, v::AbstractGPUVector;
           lt&#61;isless, by&#61;identity, rev::Bool&#61;false, order::Base.Order.Ordering&#61;Base.Order.Forward,
-          block_size::Int&#61;128, temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;</code></pre>
+          block_size::Int&#61;256, temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;</code></pre>
 <p>Specific implementations that the interfaces above forward to:</p>
 <ul>
 <li><p><code>merge_sort&#33;</code> &#40;in-place&#41;, <code>merge_sort</code> &#40;out-of-place&#41; - sort arbitrary objects with custom comparisons.</p>
@@ -28,23 +28,23 @@
 <p>Function signature:</p>
 <pre><code class="language-julia">merge_sort&#33;&#40;v::AbstractGPUVector;
             lt&#61;&#40;&lt;&#41;, by&#61;identity, rev::Bool&#61;false, order::Ordering&#61;Forward,
-            block_size::Int&#61;128, temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;
+            block_size::Int&#61;256, temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;
 
 merge_sort_by_key&#33;&#40;keys::AbstractGPUVector, values::AbstractGPUVector;
                    lt&#61;&#40;&lt;&#41;, by&#61;identity, rev::Bool&#61;false, order::Ordering&#61;Forward,
-                   block_size::Int&#61;128,
+                   block_size::Int&#61;256,
                    temp_keys::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing,
                    temp_values::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;
 
 merge_sortperm&#33;&#40;ix::AbstractGPUVector, v::AbstractGPUVector;
                 lt&#61;&#40;&lt;&#41;, by&#61;identity, rev::Bool&#61;false, order::Ordering&#61;Forward,
-                inplace::Bool&#61;false, block_size::Int&#61;128,
+                inplace::Bool&#61;false, block_size::Int&#61;256,
                 temp_ix::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing,
                 temp_v::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;
 
 merge_sortperm_lowmem&#33;&#40;ix::AbstractGPUVector, v::AbstractGPUVector;
                        lt&#61;&#40;&lt;&#41;, by&#61;identity, rev::Bool&#61;false, order::Ordering&#61;Forward,
-                       block_size::Int&#61;128,
+                       block_size::Int&#61;256,
                        temp::Union&#123;Nothing, AbstractGPUVector&#125;&#61;nothing&#41;</code></pre>
 <p>Example:</p>
 <pre><code class="language-julia">import AcceleratedKernels as AK
@@ -56,4 +56,4 @@
 <pre><code class="language-julia">v &#61; ROCArray&#40;rand&#40;Float32, 100_000&#41;&#41;
 temp &#61; similar&#40;v&#41;
 AK.sort&#33;&#40;v, temp&#61;temp&#41;</code></pre>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../map/">« Map</a><a class="docs-footer-nextpage" href="../reduce/">Reduce »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../map/">« Map</a><a class="docs-footer-nextpage" href="../reduce/">Reduce »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/task_partition/index.html b/dev/api/task_partition/index.html
index cd9a63d..1160b3a 100644
--- a/dev/api/task_partition/index.html
+++ b/dev/api/task_partition/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Task Partitioning · AcceleratedKernels.jl</title><meta name="title" content="Task Partitioning · AcceleratedKernels.jl"/><meta property="og:title" content="Task Partitioning · AcceleratedKernels.jl"/><meta property="twitter:title" content="Task Partitioning · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/task_partition/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/task_partition/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/task_partition/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li class="is-active"><a class="tocitem" href>Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Task Partitioning</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Task Partitioning</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/task_partition.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Multithreaded-Task-Partitioning"><a class="docs-heading-anchor" href="#Multithreaded-Task-Partitioning">Multithreaded Task Partitioning</a><a id="Multithreaded-Task-Partitioning-1"></a><a class="docs-heading-anchor-permalink" href="#Multithreaded-Task-Partitioning" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="AcceleratedKernels.TaskPartitioner" href="#AcceleratedKernels.TaskPartitioner"><code>AcceleratedKernels.TaskPartitioner</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">struct TaskPartitioner</code></pre><p>Partitioning <code>num_elems</code> elements / jobs over maximum <code>max_tasks</code> tasks with minimum <code>min_elems</code> elements per task.</p><p><strong>Methods</strong></p><pre><code class="nohighlight hljs">TaskPartitioner(num_elems, max_tasks=Threads.nthreads(), min_elems=1)</code></pre><p><strong>Fields</strong></p><ul><li><p><code>num_elems::Int64</code></p></li><li><p><code>max_tasks::Int64</code></p></li><li><p><code>min_elems::Int64</code></p></li><li><p><code>num_tasks::Int64</code></p></li><li><p><code>task_istarts::Vector{Int64}</code></p></li></ul><p><strong>Examples</strong></p><pre><code class="language-julia hljs">using AcceleratedKernels: TaskPartitioner
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Task Partitioning · AcceleratedKernels.jl</title><meta name="title" content="Task Partitioning · AcceleratedKernels.jl"/><meta property="og:title" content="Task Partitioning · AcceleratedKernels.jl"/><meta property="twitter:title" content="Task Partitioning · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/task_partition/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/task_partition/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/task_partition/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li class="is-active"><a class="tocitem" href>Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Task Partitioning</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Task Partitioning</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/task_partition.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Multithreaded-Task-Partitioning"><a class="docs-heading-anchor" href="#Multithreaded-Task-Partitioning">Multithreaded Task Partitioning</a><a id="Multithreaded-Task-Partitioning-1"></a><a class="docs-heading-anchor-permalink" href="#Multithreaded-Task-Partitioning" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="AcceleratedKernels.TaskPartitioner" href="#AcceleratedKernels.TaskPartitioner"><code>AcceleratedKernels.TaskPartitioner</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">struct TaskPartitioner</code></pre><p>Partitioning <code>num_elems</code> elements / jobs over maximum <code>max_tasks</code> tasks with minimum <code>min_elems</code> elements per task.</p><p><strong>Methods</strong></p><pre><code class="nohighlight hljs">TaskPartitioner(num_elems, max_tasks=Threads.nthreads(), min_elems=1)</code></pre><p><strong>Fields</strong></p><ul><li><p><code>num_elems::Int64</code></p></li><li><p><code>max_tasks::Int64</code></p></li><li><p><code>min_elems::Int64</code></p></li><li><p><code>num_tasks::Int64</code></p></li><li><p><code>task_istarts::Vector{Int64}</code></p></li></ul><p><strong>Examples</strong></p><pre><code class="language-julia hljs">using AcceleratedKernels: TaskPartitioner
 
 # Divide 10 elements between 4 tasks
 tp = TaskPartitioner(10, 4)
@@ -24,7 +24,7 @@
 tp[i] = 1:5
 tp[i] = 6:10
 tp[i] = 11:15
-tp[i] = 16:20</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/a3be263fb8dcc0e233ff46f4c5ec29f11191ff33/src/task_partitioner.jl#L1-L47">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="AcceleratedKernels.task_partition" href="#AcceleratedKernels.task_partition"><code>AcceleratedKernels.task_partition</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">task_partition(f, num_elems, max_tasks=Threads.nthreads(), min_elems=1)
+tp[i] = 16:20</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/cb75d53e786033327c45d3d9a911701a4bc63446/src/task_partitioner.jl#L1-L47">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="AcceleratedKernels.task_partition" href="#AcceleratedKernels.task_partition"><code>AcceleratedKernels.task_partition</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">task_partition(f, num_elems, max_tasks=Threads.nthreads(), min_elems=1)
 task_partition(f, tp::TaskPartitioner)</code></pre><p>Partition <code>num_elems</code> jobs across at most <code>num_tasks</code> parallel tasks with at least <code>min_elems</code> per task, calling <code>f(start_index:end_index)</code>, where the indices are between 1 and <code>num_elems</code>.</p><p><strong>Examples</strong></p><p>A toy example showing outputs:</p><pre><code class="language-julia hljs">num_elems = 4
 task_partition(println, num_elems)
 
@@ -34,4 +34,4 @@
 2:2
 3:3</code></pre><p>This function is probably most useful with a do-block, e.g.:</p><pre><code class="language-julia hljs">task_partition(4) do irange
     some_long_computation(param1, param2, irange)
-end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/a3be263fb8dcc0e233ff46f4c5ec29f11191ff33/src/task_partitioner.jl#L114-L140">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../custom_structs/">« Custom Structs</a><a class="docs-footer-nextpage" href="../../testing/">Testing »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/cb75d53e786033327c45d3d9a911701a4bc63446/src/task_partitioner.jl#L114-L140">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../custom_structs/">« Custom Structs</a><a class="docs-footer-nextpage" href="../utilities/">Utilities »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/using_backends/index.html b/dev/api/using_backends/index.html
index c5d901b..895884d 100644
--- a/dev/api/using_backends/index.html
+++ b/dev/api/using_backends/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Using Different Backends · AcceleratedKernels.jl</title><meta name="title" content="Using Different Backends · AcceleratedKernels.jl"/><meta property="og:title" content="Using Different Backends · AcceleratedKernels.jl"/><meta property="twitter:title" content="Using Different Backends · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/using_backends/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/using_backends/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/using_backends/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li class="is-active"><a class="tocitem" href>Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Using Different Backends</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Using Different Backends</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/using_backends.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Using-Different-Backends"><a class="docs-heading-anchor" href="#Using-Different-Backends">Using Different Backends</a><a id="Using-Different-Backends-1"></a><a class="docs-heading-anchor-permalink" href="#Using-Different-Backends" title="Permalink"></a></h3><div class="markdown"><p>For any of the examples below, simply use a different GPU array and AcceleratedKernels.jl will pick the right backend:</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Using Different Backends · AcceleratedKernels.jl</title><meta name="title" content="Using Different Backends · AcceleratedKernels.jl"/><meta property="og:title" content="Using Different Backends · AcceleratedKernels.jl"/><meta property="twitter:title" content="Using Different Backends · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/using_backends/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/using_backends/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/using_backends/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li class="is-active"><a class="tocitem" href>Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Using Different Backends</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Using Different Backends</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/using_backends.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Using-Different-Backends"><a class="docs-heading-anchor" href="#Using-Different-Backends">Using Different Backends</a><a id="Using-Different-Backends-1"></a><a class="docs-heading-anchor-permalink" href="#Using-Different-Backends" title="Permalink"></a></h3><div class="markdown"><p>For any of the examples below, simply use a different GPU array and AcceleratedKernels.jl will pick the right backend:</p>
 <pre><code class="language-julia"># Intel Graphics
 using oneAPI
 v &#61; oneArray&#123;Int32&#125;&#40;undef, 100_000&#41;             # Empty array
@@ -24,4 +24,4 @@
 AK.reduce&#40;&#43;, v, max_tasks&#61;Threads.nthreads&#40;&#41;&#41;</code></pre>
 <p>Note the <code>reduce</code> and <code>mapreduce</code> CPU implementations forward arguments to <a href="https://github.com/JuliaFolds2/OhMyThreads.jl">OhMyThreads.jl</a>, an excellent package for multithreading. The focus of AcceleratedKernels.jl is to provide a unified interface to high-performance implementations of common algorithmic kernels, for both CPUs and GPUs - if you need fine-grained control over threads, scheduling, communication for specialised algorithms &#40;e.g. with highly unequal workloads&#41;, consider using <a href="https://github.com/JuliaFolds2/OhMyThreads.jl">OhMyThreads.jl</a> or <a href="https://github.com/JuliaGPU/KernelAbstractions.jl">KernelAbstractions.jl</a> directly.</p>
 <p>There is ongoing work on multithreaded CPU <code>sort</code> and <code>accumulate</code> implementations - at the moment, they fall back to single-threaded algorithms; the rest of the library is fully parallelised for both CPUs and GPUs.</p>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../performance/">« Performance Tips</a><a class="docs-footer-nextpage" href="../foreachindex/">General Loops »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../performance/">« Performance Tips</a><a class="docs-footer-nextpage" href="../foreachindex/">General Loops »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/utilities/index.html b/dev/api/utilities/index.html
new file mode 100644
index 0000000..e93e3dc
--- /dev/null
+++ b/dev/api/utilities/index.html
@@ -0,0 +1,11 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Utilities · AcceleratedKernels.jl</title><meta name="title" content="Utilities · AcceleratedKernels.jl"/><meta property="og:title" content="Utilities · AcceleratedKernels.jl"/><meta property="twitter:title" content="Utilities · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/utilities/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/api/utilities/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/api/utilities/"/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">Overview</a></li><li><a class="tocitem" href="../../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../foreachindex/">General Loops</a></li><li><a class="tocitem" href="../map/">Map</a></li><li><a class="tocitem" href="../sort/">Sorting</a></li><li><a class="tocitem" href="../reduce/">Reduce</a></li><li><a class="tocitem" href="../mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../accumulate/">Accumulate</a></li><li><a class="tocitem" href="../binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../predicates/">Predicates</a></li><li><a class="tocitem" href="../custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../task_partition/">Task Partitioning</a></li><li class="is-active"><a class="tocitem" href>Utilities</a></li></ul></li><li><a class="tocitem" href="../../testing/">Testing</a></li><li><a class="tocitem" href="../../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Manual</a></li><li class="is-active"><a href>Utilities</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Utilities</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/api/utilities.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h3 id="Utilities"><a class="docs-heading-anchor" href="#Utilities">Utilities</a><a id="Utilities-1"></a><a class="docs-heading-anchor-permalink" href="#Utilities" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="AcceleratedKernels.TypeWrap" href="#AcceleratedKernels.TypeWrap"><code>AcceleratedKernels.TypeWrap</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">struct TypeWrap{T} end
+TypeWrap(T) = TypeWrap{T}()
+Base.:*(x::Number, ::TypeWrap{T}) where T = T(x)</code></pre><p>Allow type conversion via multiplication, like <code>5i32</code> for <code>5 * i32</code> where <code>i32</code> is a <code>TypeWrap</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia hljs">import AcceleratedKernels as AK
+u32 = AK.TypeWrap{UInt32}
+println(typeof(5u32))
+
+# output
+UInt32</code></pre><p>This is used e.g. to set integer literals inside kernels as u16 to ensure no indices are promoted beyond the index base type.</p><p>For example, Metal uses <code>UInt32</code> indices, but if it is mixed with a Julia integer literal (<code>Int64</code> by default) like in <code>src[ithread + 1]</code>, we incur a type cast to <code>Int64</code>. Instead, we can use <code>src[ithread + 1u16]</code> or <code>src[ithread + 0x1]</code> to ensure the index is <code>UInt32</code> and avoid the cast; as the integer literal <code>1u16</code> has a shorter type than <code>ithread</code>, it is automatically promoted (at compile time) to the <code>ithread</code> type, whether <code>ithread</code> is signed or unsigned as per the backend.</p><pre><code class="language-julia hljs"># Defaults defined
+1u8, 2u16, 3u32, 4u64
+5i8, 6i16, 7i32, 8i64</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/cb75d53e786033327c45d3d9a911701a4bc63446/src/utils.jl#L6-L37">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../task_partition/">« Task Partitioning</a><a class="docs-footer-nextpage" href="../../testing/">Testing »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/assets/documenter.js b/dev/assets/documenter.js
index 82252a1..7d68cd8 100644
--- a/dev/assets/documenter.js
+++ b/dev/assets/documenter.js
@@ -612,176 +612,194 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
   };
 }
 
-// `worker = Threads.@spawn worker_function(documenterSearchIndex)`, but in JavaScript!
-const filters = [
-  ...new Set(documenterSearchIndex["docs"].map((x) => x.category)),
-];
-const worker_str =
-  "(" +
-  worker_function.toString() +
-  ")(" +
-  JSON.stringify(documenterSearchIndex["docs"]) +
-  "," +
-  JSON.stringify(documenterBaseURL) +
-  "," +
-  JSON.stringify(filters) +
-  ")";
-const worker_blob = new Blob([worker_str], { type: "text/javascript" });
-const worker = new Worker(URL.createObjectURL(worker_blob));
-
 /////// SEARCH MAIN ///////
 
-// Whether the worker is currently handling a search. This is a boolean
-// as the worker only ever handles 1 or 0 searches at a time.
-var worker_is_running = false;
-
-// The last search text that was sent to the worker. This is used to determine
-// if the worker should be launched again when it reports back results.
-var last_search_text = "";
-
-// The results of the last search. This, in combination with the state of the filters
-// in the DOM, is used compute the results to display on calls to update_search.
-var unfiltered_results = [];
-
-// Which filter is currently selected
-var selected_filter = "";
-
-$(document).on("input", ".documenter-search-input", function (event) {
-  if (!worker_is_running) {
-    launch_search();
-  }
-});
-
-function launch_search() {
-  worker_is_running = true;
-  last_search_text = $(".documenter-search-input").val();
-  worker.postMessage(last_search_text);
-}
-
-worker.onmessage = function (e) {
-  if (last_search_text !== $(".documenter-search-input").val()) {
-    launch_search();
-  } else {
-    worker_is_running = false;
-  }
-
-  unfiltered_results = e.data;
-  update_search();
-};
+function runSearchMainCode() {
+  // `worker = Threads.@spawn worker_function(documenterSearchIndex)`, but in JavaScript!
+  const filters = [
+    ...new Set(documenterSearchIndex["docs"].map((x) => x.category)),
+  ];
+  const worker_str =
+    "(" +
+    worker_function.toString() +
+    ")(" +
+    JSON.stringify(documenterSearchIndex["docs"]) +
+    "," +
+    JSON.stringify(documenterBaseURL) +
+    "," +
+    JSON.stringify(filters) +
+    ")";
+  const worker_blob = new Blob([worker_str], { type: "text/javascript" });
+  const worker = new Worker(URL.createObjectURL(worker_blob));
+
+  // Whether the worker is currently handling a search. This is a boolean
+  // as the worker only ever handles 1 or 0 searches at a time.
+  var worker_is_running = false;
+
+  // The last search text that was sent to the worker. This is used to determine
+  // if the worker should be launched again when it reports back results.
+  var last_search_text = "";
+
+  // The results of the last search. This, in combination with the state of the filters
+  // in the DOM, is used compute the results to display on calls to update_search.
+  var unfiltered_results = [];
+
+  // Which filter is currently selected
+  var selected_filter = "";
+
+  $(document).on("input", ".documenter-search-input", function (event) {
+    if (!worker_is_running) {
+      launch_search();
+    }
+  });
 
-$(document).on("click", ".search-filter", function () {
-  if ($(this).hasClass("search-filter-selected")) {
-    selected_filter = "";
-  } else {
-    selected_filter = $(this).text().toLowerCase();
+  function launch_search() {
+    worker_is_running = true;
+    last_search_text = $(".documenter-search-input").val();
+    worker.postMessage(last_search_text);
   }
 
-  // This updates search results and toggles classes for UI:
-  update_search();
-});
+  worker.onmessage = function (e) {
+    if (last_search_text !== $(".documenter-search-input").val()) {
+      launch_search();
+    } else {
+      worker_is_running = false;
+    }
 
-/**
- * Make/Update the search component
- */
-function update_search() {
-  let querystring = $(".documenter-search-input").val();
+    unfiltered_results = e.data;
+    update_search();
+  };
 
-  if (querystring.trim()) {
-    if (selected_filter == "") {
-      results = unfiltered_results;
+  $(document).on("click", ".search-filter", function () {
+    if ($(this).hasClass("search-filter-selected")) {
+      selected_filter = "";
     } else {
-      results = unfiltered_results.filter((result) => {
-        return selected_filter == result.category.toLowerCase();
-      });
+      selected_filter = $(this).text().toLowerCase();
     }
 
-    let search_result_container = ``;
-    let modal_filters = make_modal_body_filters();
-    let search_divider = `<div class="search-divider w-100"></div>`;
+    // This updates search results and toggles classes for UI:
+    update_search();
+  });
 
-    if (results.length) {
-      let links = [];
-      let count = 0;
-      let search_results = "";
-
-      for (var i = 0, n = results.length; i < n && count < 200; ++i) {
-        let result = results[i];
-        if (result.location && !links.includes(result.location)) {
-          search_results += result.div;
-          count++;
-          links.push(result.location);
-        }
-      }
+  /**
+   * Make/Update the search component
+   */
+  function update_search() {
+    let querystring = $(".documenter-search-input").val();
 
-      if (count == 1) {
-        count_str = "1 result";
-      } else if (count == 200) {
-        count_str = "200+ results";
+    if (querystring.trim()) {
+      if (selected_filter == "") {
+        results = unfiltered_results;
       } else {
-        count_str = count + " results";
+        results = unfiltered_results.filter((result) => {
+          return selected_filter == result.category.toLowerCase();
+        });
       }
-      let result_count = `<div class="is-size-6">${count_str}</div>`;
 
-      search_result_container = `
+      let search_result_container = ``;
+      let modal_filters = make_modal_body_filters();
+      let search_divider = `<div class="search-divider w-100"></div>`;
+
+      if (results.length) {
+        let links = [];
+        let count = 0;
+        let search_results = "";
+
+        for (var i = 0, n = results.length; i < n && count < 200; ++i) {
+          let result = results[i];
+          if (result.location && !links.includes(result.location)) {
+            search_results += result.div;
+            count++;
+            links.push(result.location);
+          }
+        }
+
+        if (count == 1) {
+          count_str = "1 result";
+        } else if (count == 200) {
+          count_str = "200+ results";
+        } else {
+          count_str = count + " results";
+        }
+        let result_count = `<div class="is-size-6">${count_str}</div>`;
+
+        search_result_container = `
+              <div class="is-flex is-flex-direction-column gap-2 is-align-items-flex-start">
+                  ${modal_filters}
+                  ${search_divider}
+                  ${result_count}
+                  <div class="is-clipped w-100 is-flex is-flex-direction-column gap-2 is-align-items-flex-start has-text-justified mt-1">
+                    ${search_results}
+                  </div>
+              </div>
+          `;
+      } else {
+        search_result_container = `
             <div class="is-flex is-flex-direction-column gap-2 is-align-items-flex-start">
                 ${modal_filters}
                 ${search_divider}
-                ${result_count}
-                <div class="is-clipped w-100 is-flex is-flex-direction-column gap-2 is-align-items-flex-start has-text-justified mt-1">
-                  ${search_results}
-                </div>
-            </div>
+                <div class="is-size-6">0 result(s)</div>
+              </div>
+              <div class="has-text-centered my-5 py-5">No result found!</div>
         `;
-    } else {
-      search_result_container = `
-           <div class="is-flex is-flex-direction-column gap-2 is-align-items-flex-start">
-               ${modal_filters}
-               ${search_divider}
-               <div class="is-size-6">0 result(s)</div>
-            </div>
-            <div class="has-text-centered my-5 py-5">No result found!</div>
-       `;
-    }
+      }
 
-    if ($(".search-modal-card-body").hasClass("is-justify-content-center")) {
-      $(".search-modal-card-body").removeClass("is-justify-content-center");
-    }
+      if ($(".search-modal-card-body").hasClass("is-justify-content-center")) {
+        $(".search-modal-card-body").removeClass("is-justify-content-center");
+      }
 
-    $(".search-modal-card-body").html(search_result_container);
-  } else {
-    if (!$(".search-modal-card-body").hasClass("is-justify-content-center")) {
-      $(".search-modal-card-body").addClass("is-justify-content-center");
+      $(".search-modal-card-body").html(search_result_container);
+    } else {
+      if (!$(".search-modal-card-body").hasClass("is-justify-content-center")) {
+        $(".search-modal-card-body").addClass("is-justify-content-center");
+      }
+
+      $(".search-modal-card-body").html(`
+        <div class="has-text-centered my-5 py-5">Type something to get started!</div>
+      `);
     }
+  }
 
-    $(".search-modal-card-body").html(`
-      <div class="has-text-centered my-5 py-5">Type something to get started!</div>
-    `);
+  /**
+   * Make the modal filter html
+   *
+   * @returns string
+   */
+  function make_modal_body_filters() {
+    let str = filters
+      .map((val) => {
+        if (selected_filter == val.toLowerCase()) {
+          return `<a href="javascript:;" class="search-filter search-filter-selected"><span>${val}</span></a>`;
+        } else {
+          return `<a href="javascript:;" class="search-filter"><span>${val}</span></a>`;
+        }
+      })
+      .join("");
+
+    return `
+          <div class="is-flex gap-2 is-flex-wrap-wrap is-justify-content-flex-start is-align-items-center search-filters">
+              <span class="is-size-6">Filters:</span>
+              ${str}
+          </div>`;
   }
 }
 
-/**
- * Make the modal filter html
- *
- * @returns string
- */
-function make_modal_body_filters() {
-  let str = filters
-    .map((val) => {
-      if (selected_filter == val.toLowerCase()) {
-        return `<a href="javascript:;" class="search-filter search-filter-selected"><span>${val}</span></a>`;
-      } else {
-        return `<a href="javascript:;" class="search-filter"><span>${val}</span></a>`;
-      }
-    })
-    .join("");
-
-  return `
-        <div class="is-flex gap-2 is-flex-wrap-wrap is-justify-content-flex-start is-align-items-center search-filters">
-            <span class="is-size-6">Filters:</span>
-            ${str}
-        </div>`;
+function waitUntilSearchIndexAvailable() {
+  // It is possible that the documenter.js script runs before the page
+  // has finished loading and documenterSearchIndex gets defined.
+  // So we need to wait until the search index actually loads before setting
+  // up all the search-related stuff.
+  if (typeof documenterSearchIndex !== "undefined") {
+    runSearchMainCode();
+  } else {
+    console.warn("Search Index not available, waiting");
+    setTimeout(waitUntilSearchIndexAvailable, 1000);
+  }
 }
 
+// The actual entry point to the search code
+waitUntilSearchIndexAvailable();
+
 })
 ////////////////////////////////////////////////////////////////////////////////
 require(['jquery'], function($) {
diff --git a/dev/benchmarks/index.html b/dev/benchmarks/index.html
index 1fce5ca..033e0f8 100644
--- a/dev/benchmarks/index.html
+++ b/dev/benchmarks/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Benchmarks · AcceleratedKernels.jl</title><meta name="title" content="Benchmarks · AcceleratedKernels.jl"/><meta property="og:title" content="Benchmarks · AcceleratedKernels.jl"/><meta property="twitter:title" content="Benchmarks · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/benchmarks/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/benchmarks/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/benchmarks/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li class="is-active"><a class="tocitem" href>Benchmarks</a><ul class="internal"><li><a class="tocitem" href="#Benchmarks"><span>Benchmarks</span></a></li></ul></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Benchmarks</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Benchmarks</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/benchmarks.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="Benchmarks"><a class="docs-heading-anchor" href="#Benchmarks">Benchmarks</a><a id="Benchmarks-1"></a><a class="docs-heading-anchor-permalink" href="#Benchmarks" title="Permalink"></a></h2><div class="markdown"><p>Some arithmetic-heavy benchmarks are given below - see <a href="https://github.com/anicusan/AcceleratedKernels-Benchmark">this repository</a> for the code; our paper will be linked here upon publishing with a full analysis.</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Benchmarks · AcceleratedKernels.jl</title><meta name="title" content="Benchmarks · AcceleratedKernels.jl"/><meta property="og:title" content="Benchmarks · AcceleratedKernels.jl"/><meta property="twitter:title" content="Benchmarks · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/benchmarks/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/benchmarks/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/benchmarks/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li class="is-active"><a class="tocitem" href>Benchmarks</a><ul class="internal"><li><a class="tocitem" href="#Benchmarks"><span>Benchmarks</span></a></li></ul></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../api/utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Benchmarks</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Benchmarks</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/benchmarks.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="Benchmarks"><a class="docs-heading-anchor" href="#Benchmarks">Benchmarks</a><a id="Benchmarks-1"></a><a class="docs-heading-anchor-permalink" href="#Benchmarks" title="Permalink"></a></h2><div class="markdown"><p>Some arithmetic-heavy benchmarks are given below - see <a href="https://github.com/anicusan/AcceleratedKernels-Benchmark">this repository</a> for the code; our paper will be linked here upon publishing with a full analysis.</p>
 <p><img src="https://github.com/anicusan/AcceleratedKernels-Benchmark/blob/main/ArithmeticBenchmark/ArithmeticBenchmarkTable.png?raw&#61;true" alt="Arithmetic benchmark" /></p>
 <p>See <code>protoype/sort_benchmark.jl</code> for a small-scale sorting benchmark code and <code>prototype/thrust_sort</code> for the Nvidia Thrust wrapper. The results below are from a system with Linux 6.6.30-2-MANJARO, Intel Core i9-10885H CPU, Nvidia Quadro RTX 4000 with Max-Q Design GPU, Thrust 1.17.1-1, Julia Version 1.10.4.</p>
 <p><img src="https://github.com/juliagpu/AcceleratedKernels.jl/blob/main/docs/src/assets/sort_benchmark.png?raw&#61;true" alt="Sorting benchmark" /></p>
@@ -7,4 +7,4 @@
 <p>The sorting algorithms can also be combined with <a href="https://github.com/anicusan/MPISort.jl"><code>MPISort.jl</code></a> for multi-<em>device</em> sorting - indeed, you can co-operatively sort using <strong>both</strong> your CPU and GPU&#33; Or use 200 GPUs on the 52 nodes of <a href="https://www.baskerville.ac.uk/">Baskerville HPC</a> to sort 538-855 GB of data per second &#40;comparable with the highest figure reported in literature of <a href="http://dx.doi.org/10.1145/2464996.2465442">900 GB/s on 262,144 CPU cores</a>&#41;:</p>
 <p><img src="https://github.com/juliagpu/AcceleratedKernels.jl/blob/main/docs/src/assets/sort_throughput.png?raw&#61;true" alt="Sorting throughput" /></p>
 <p>Hardware stats for nerds <a href="https://docs.baskerville.ac.uk/system/">available here</a>. Full analysis will be linked here once our paper is published.</p>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../">« Overview</a><a class="docs-footer-nextpage" href="../performance/">Performance Tips »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../">« Overview</a><a class="docs-footer-nextpage" href="../performance/">Performance Tips »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/debugging/index.html b/dev/debugging/index.html
index c2db23b..ecdaa1e 100644
--- a/dev/debugging/index.html
+++ b/dev/debugging/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Debugging Kernels · AcceleratedKernels.jl</title><meta name="title" content="Debugging Kernels · AcceleratedKernels.jl"/><meta property="og:title" content="Debugging Kernels · AcceleratedKernels.jl"/><meta property="twitter:title" content="Debugging Kernels · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/debugging/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/debugging/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/debugging/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li class="is-active"><a class="tocitem" href>Debugging Kernels</a><ul class="internal"><li><a class="tocitem" href="#Debugging-Kernels"><span>Debugging Kernels</span></a></li></ul></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Debugging Kernels</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Debugging Kernels</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/debugging.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="Debugging-Kernels"><a class="docs-heading-anchor" href="#Debugging-Kernels">Debugging Kernels</a><a id="Debugging-Kernels-1"></a><a class="docs-heading-anchor-permalink" href="#Debugging-Kernels" title="Permalink"></a></h2><p>As the compilation pipeline of GPU kernels is different to that of base Julia, error messages also look different - for example, where Julia would insert an exception when a variable name was not defined (e.g. we had a typo), a GPU kernel throwing exceptions cannot be compiled and instead you&#39;ll see some cascading errors like <code>&quot;[...] compiling [...] resulted in invalid LLVM IR&quot;</code> caused by <code>&quot;Reason: unsupported use of an undefined name&quot;</code> resulting in <code>&quot;Reason: unsupported dynamic function invocation&quot;</code>, etc.</p><p>Thankfully, there are only about 3 types of such error messages and they&#39;re not that scary when you look into them.</p><h3 id="Undefined-Variables-/-Typos"><a class="docs-heading-anchor" href="#Undefined-Variables-/-Typos">Undefined Variables / Typos</a><a id="Undefined-Variables-/-Typos-1"></a><a class="docs-heading-anchor-permalink" href="#Undefined-Variables-/-Typos" title="Permalink"></a></h3><p>If you misspell a variable name, Julia would insert an exception:</p><pre><code class="language-julia hljs">function set_color(v, color)
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Debugging Kernels · AcceleratedKernels.jl</title><meta name="title" content="Debugging Kernels · AcceleratedKernels.jl"/><meta property="og:title" content="Debugging Kernels · AcceleratedKernels.jl"/><meta property="twitter:title" content="Debugging Kernels · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/debugging/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/debugging/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/debugging/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../api/utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li class="is-active"><a class="tocitem" href>Debugging Kernels</a><ul class="internal"><li><a class="tocitem" href="#Debugging-Kernels"><span>Debugging Kernels</span></a></li></ul></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Debugging Kernels</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Debugging Kernels</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/debugging.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="Debugging-Kernels"><a class="docs-heading-anchor" href="#Debugging-Kernels">Debugging Kernels</a><a id="Debugging-Kernels-1"></a><a class="docs-heading-anchor-permalink" href="#Debugging-Kernels" title="Permalink"></a></h2><p>As the compilation pipeline of GPU kernels is different to that of base Julia, error messages also look different - for example, where Julia would insert an exception when a variable name was not defined (e.g. we had a typo), a GPU kernel throwing exceptions cannot be compiled and instead you&#39;ll see some cascading errors like <code>&quot;[...] compiling [...] resulted in invalid LLVM IR&quot;</code> caused by <code>&quot;Reason: unsupported use of an undefined name&quot;</code> resulting in <code>&quot;Reason: unsupported dynamic function invocation&quot;</code>, etc.</p><p>Thankfully, there are only about 3 types of such error messages and they&#39;re not that scary when you look into them.</p><h3 id="Undefined-Variables-/-Typos"><a class="docs-heading-anchor" href="#Undefined-Variables-/-Typos">Undefined Variables / Typos</a><a id="Undefined-Variables-/-Typos-1"></a><a class="docs-heading-anchor-permalink" href="#Undefined-Variables-/-Typos" title="Permalink"></a></h3><p>If you misspell a variable name, Julia would insert an exception:</p><pre><code class="language-julia hljs">function set_color(v, color)
     AK.foreachindex(v) do i
         v[i] = colour           # Grab your porridge
     end
@@ -50,4 +50,4 @@
 mymul!(v, 2.0)</code></pre><p>Note that we try to multiply <code>Float32</code> values by <code>2.0</code>, which is a <code>Float64</code> - in which case we get:</p><pre><code class="language-bash hljs">ERROR: LoadError: Compilation to native code failed; see below for details.
 [...]
 caused by: NSError: Compiler encountered an internal error (AGXMetalG15X_M1, code 3)
-[...]</code></pre><p>Change the <code>2.0</code> to <code>2.0f0</code> or <code>Float32(2)</code>; in kernels with generic types (that are supposed to work on multiple possible input types), do use the same types as your inputs, using e.g. <code>T = eltype(v)</code> then <code>zero(T)</code>, <code>T(42)</code>, etc.</p><hr/><p>For other library-related problems, feel free to post a GitHub issue. For help implementing new code, or just advice, you can also use the <a href="https://discourse.julialang.org/c/domain/gpu/11">Julia Discourse</a> forum, the community is incredibly helpful.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../testing/">« Testing</a><a class="docs-footer-nextpage" href="../roadmap/">Roadmap »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+[...]</code></pre><p>Change the <code>2.0</code> to <code>2.0f0</code> or <code>Float32(2)</code>; in kernels with generic types (that are supposed to work on multiple possible input types), do use the same types as your inputs, using e.g. <code>T = eltype(v)</code> then <code>zero(T)</code>, <code>T(42)</code>, etc.</p><hr/><p>For other library-related problems, feel free to post a GitHub issue. For help implementing new code, or just advice, you can also use the <a href="https://discourse.julialang.org/c/domain/gpu/11">Julia Discourse</a> forum, the community is incredibly helpful.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../testing/">« Testing</a><a class="docs-footer-nextpage" href="../roadmap/">Roadmap »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/index.html b/dev/index.html
index 83cb239..d88e1fe 100644
--- a/dev/index.html
+++ b/dev/index.html
@@ -1,9 +1,9 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Overview · AcceleratedKernels.jl</title><meta name="title" content="Overview · AcceleratedKernels.jl"/><meta property="og:title" content="Overview · AcceleratedKernels.jl"/><meta property="twitter:title" content="Overview · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/"/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href><img src="assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>Overview</a><ul class="internal"><li><a class="tocitem" href="#What&#39;s-Different?"><span>What&#39;s Different?</span></a></li><li><a class="tocitem" href="#Status"><span>Status</span></a></li><li><a class="tocitem" href="#Acknowledgements"><span>Acknowledgements</span></a></li><li><a class="tocitem" href="#License"><span>License</span></a></li></ul></li><li><a class="tocitem" href="benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="api/map/">Map</a></li><li><a class="tocitem" href="api/sort/">Sorting</a></li><li><a class="tocitem" href="api/reduce/">Reduce</a></li><li><a class="tocitem" href="api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="api/predicates/">Predicates</a></li><li><a class="tocitem" href="api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="api/task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="testing/">Testing</a></li><li><a class="tocitem" href="debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="roadmap/">Roadmap</a></li><li><a class="tocitem" href="references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Overview</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Overview</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><p><img src="assets/logo.png" alt="Logo"/></p><p>Parallel algorithm building blocks for the Julia ecosystem, targeting multithreaded CPUs, and GPUs via Intel oneAPI, AMD ROCm, Apple Metal and Nvidia CUDA (and any future backends added to the <a href="https://juliagpu.org/">JuliaGPU</a> organisation).</p><hr/><h2 id="What&#39;s-Different?"><a class="docs-heading-anchor" href="#What&#39;s-Different?">What&#39;s Different?</a><a id="What&#39;s-Different?-1"></a><a class="docs-heading-anchor-permalink" href="#What&#39;s-Different?" title="Permalink"></a></h2><div class="markdown"><p>As far as I am aware, this is the first cross-architecture parallel standard library <em>from a unified codebase</em> - that is, the code is written as <a href="https://github.com/JuliaGPU/KernelAbstractions.jl">KernelAbstractions.jl</a> backend-agnostic kernels, which are then <strong>transpiled</strong> to a GPU backend; that means we benefit from all the optimisations available on the native platform and official compiler stacks. For example, unlike open standards like OpenCL that require GPU vendors to implement that API for their hardware, we target the existing official compilers. And while performance-portability libraries like <a href="https://github.com/kokkos/kokkos">Kokkos</a> and <a href="https://github.com/LLNL/RAJA">RAJA</a> are powerful for large C&#43;&#43; codebases, they require US National Lab-level development and maintenance efforts to effectively forward calls from a single API to other OpenMP, CUDA Thrust, ROCm rocThrust, oneAPI DPC&#43;&#43; libraries developed separately. In comparison, this library was developed effectively in a week by a single person because developing packages in Julia is just a joy.</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Overview · AcceleratedKernels.jl</title><meta name="title" content="Overview · AcceleratedKernels.jl"/><meta property="og:title" content="Overview · AcceleratedKernels.jl"/><meta property="twitter:title" content="Overview · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/"/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href><img src="assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>Overview</a><ul class="internal"><li><a class="tocitem" href="#What&#39;s-Different?"><span>What&#39;s Different?</span></a></li><li><a class="tocitem" href="#Status"><span>Status</span></a></li><li><a class="tocitem" href="#Acknowledgements"><span>Acknowledgements</span></a></li><li><a class="tocitem" href="#License"><span>License</span></a></li></ul></li><li><a class="tocitem" href="benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="api/map/">Map</a></li><li><a class="tocitem" href="api/sort/">Sorting</a></li><li><a class="tocitem" href="api/reduce/">Reduce</a></li><li><a class="tocitem" href="api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="api/predicates/">Predicates</a></li><li><a class="tocitem" href="api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="api/task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="api/utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="testing/">Testing</a></li><li><a class="tocitem" href="debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="roadmap/">Roadmap</a></li><li><a class="tocitem" href="references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Overview</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Overview</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><p><img src="assets/logo.png" alt="Logo"/></p><p>Parallel algorithm building blocks for the Julia ecosystem, targeting multithreaded CPUs, and GPUs via Intel oneAPI, AMD ROCm, Apple Metal and Nvidia CUDA (and any future backends added to the <a href="https://juliagpu.org/">JuliaGPU</a> organisation).</p><hr/><h2 id="What&#39;s-Different?"><a class="docs-heading-anchor" href="#What&#39;s-Different?">What&#39;s Different?</a><a id="What&#39;s-Different?-1"></a><a class="docs-heading-anchor-permalink" href="#What&#39;s-Different?" title="Permalink"></a></h2><div class="markdown"><p>As far as I am aware, this is the first cross-architecture parallel standard library <em>from a unified codebase</em> - that is, the code is written as <a href="https://github.com/JuliaGPU/KernelAbstractions.jl">KernelAbstractions.jl</a> backend-agnostic kernels, which are then <strong>transpiled</strong> to a GPU backend; that means we benefit from all the optimisations available on the native platform and official compiler stacks. For example, unlike open standards like OpenCL that require GPU vendors to implement that API for their hardware, we target the existing official compilers. And while performance-portability libraries like <a href="https://github.com/kokkos/kokkos">Kokkos</a> and <a href="https://github.com/LLNL/RAJA">RAJA</a> are powerful for large C&#43;&#43; codebases, they require US National Lab-level development and maintenance efforts to effectively forward calls from a single API to other OpenMP, CUDA Thrust, ROCm rocThrust, oneAPI DPC&#43;&#43; libraries developed separately. In comparison, this library was developed effectively in a week by a single person because developing packages in Julia is just a joy.</p>
 <p>Again, this is only possible because of the unique Julia compilation model, the <a href="https://juliagpu.org/">JuliaGPU</a> organisation work for reusable GPU backend infrastructure, and especially the <a href="https://github.com/JuliaGPU/KernelAbstractions.jl">KernelAbstractions.jl</a> backend-agnostic kernel language. Thank you.</p>
 </div><hr/><h2 id="Status"><a class="docs-heading-anchor" href="#Status">Status</a><a id="Status-1"></a><a class="docs-heading-anchor-permalink" href="#Status" title="Permalink"></a></h2><div class="markdown"><p>The AcceleratedKernels.jl sorters were adopted as the official <a href="https://github.com/JuliaGPU/AMDGPU.jl/pull/688">AMDGPU algorithms</a>&#33; The API is starting to stabilise; it follows the Julia standard library fairly closely - and additionally exposing all temporary arrays for memory reuse. For any new ideas / requests, please join the conversation on <a href="https://discourse.julialang.org/t/ann-acceleratedkernels-jl-cross-architecture-parallel-algorithms-for-julias-gpu-backends/119698/16">Julia Discourse</a> or post <a href="https://github.com/juliagpu/AcceleratedKernels.jl/issues">an issue</a>.</p>
 <p>We have an extensive randomised test suite that we run on the CPU &#40;single- and multi-threaded&#41; backend on Windows, Ubuntu and MacOS for Julia LTS, Stable, and Pre-Release, plus the CUDA, AMDGPU, oneAPI and Metal backends on the <a href="https://github.com/JuliaGPU/buildkite">JuliaGPU buildkite</a>.</p>
 <p>AcceleratedKernels.jl is also be a fundamental building block of applications developed at <a href="https://evophase.co.uk/">EvoPhase</a>, so it will see continuous heavy use with industry backing. Long-term stability, performance improvements and support are priorities for us.</p>
 </div><hr/><h2 id="Acknowledgements"><a class="docs-heading-anchor" href="#Acknowledgements">Acknowledgements</a><a id="Acknowledgements-1"></a><a class="docs-heading-anchor-permalink" href="#Acknowledgements" title="Permalink"></a></h2><div class="markdown"><p>Designed and built by <a href="https://github.com/anicusan">Andrei-Leonard Nicusan</a>, maintained with <a href="https://github.com/juliagpu/AcceleratedKernels.jl/graphs/contributors">contributors</a>.</p>
 <p>Much of this work was possible because of the fantastic HPC resources at the University of Birmingham and the Birmingham Environment for Academic Research, which gave us free on-demand access to thousands of CPUs and GPUs that we experimented on, and the support teams we nagged. In particular, thank you to Kit Windows-Yule and Andrew Morris on the BlueBEAR and Baskerville T2 supercomputers&#39; leadership, and Simon Branford, Simon Hartley, James Allsopp and James Carpenter for computing support.</p>
-</div><hr/><h2 id="License"><a class="docs-heading-anchor" href="#License">License</a><a id="License-1"></a><a class="docs-heading-anchor-permalink" href="#License" title="Permalink"></a></h2><p>AcceleratedKernels.jl is MIT-licensed. Enjoy.</p></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="benchmarks/">Benchmarks »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div><hr/><h2 id="License"><a class="docs-heading-anchor" href="#License">License</a><a id="License-1"></a><a class="docs-heading-anchor-permalink" href="#License" title="Permalink"></a></h2><p>AcceleratedKernels.jl is MIT-licensed. Enjoy.</p></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="benchmarks/">Benchmarks »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/objects.inv b/dev/objects.inv
index 297934f..ea14ee0 100644
Binary files a/dev/objects.inv and b/dev/objects.inv differ
diff --git a/dev/performance/index.html b/dev/performance/index.html
index 613ef5d..9705419 100644
--- a/dev/performance/index.html
+++ b/dev/performance/index.html
@@ -1,4 +1,4 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Performance Tips · AcceleratedKernels.jl</title><meta name="title" content="Performance Tips · AcceleratedKernels.jl"/><meta property="og:title" content="Performance Tips · AcceleratedKernels.jl"/><meta property="twitter:title" content="Performance Tips · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/performance/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/performance/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/performance/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li class="is-active"><a class="tocitem" href>Performance Tips</a><ul class="internal"><li><a class="tocitem" href="#Performance-Tips"><span>Performance Tips</span></a></li></ul></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Performance Tips</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Performance Tips</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/performance.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="Performance-Tips"><a class="docs-heading-anchor" href="#Performance-Tips">Performance Tips</a><a id="Performance-Tips-1"></a><a class="docs-heading-anchor-permalink" href="#Performance-Tips" title="Permalink"></a></h2><p>If you just started using <code>AcceleratedKernels.jl</code>, see the Manual first for some examples.</p><h3 id="GPU-Block-Size-and-CPU-Threads"><a class="docs-heading-anchor" href="#GPU-Block-Size-and-CPU-Threads">GPU Block Size and CPU Threads</a><a id="GPU-Block-Size-and-CPU-Threads-1"></a><a class="docs-heading-anchor-permalink" href="#GPU-Block-Size-and-CPU-Threads" title="Permalink"></a></h3><p>All GPU functions allow you to specify a block size - this is often a power of two (mostly 64, 128, 256, 512); the optimum depends on the algorithm, input data and hardware - you can try the different values and <code>@time</code> or <code>@benchmark</code> them:</p><pre><code class="language-julia hljs">@time AK.foreachindex(f, itr_gpu, block_size=512)</code></pre><p>Similarly, for performance on the CPU the overhead of spawning threads should be masked by processing more elements per thread (but there is no reason here to launch more threads than <code>Threads.nthreads()</code>, the number of threads Julia was started with); the optimum depends on how expensive <code>f</code> is - again, benchmarking is your friend:</p><pre><code class="language-julia hljs">@time AK.foreachindex(f, itr_cpu, max_tasks=16, min_elems=1000)</code></pre><h3 id="Temporary-Arrays"><a class="docs-heading-anchor" href="#Temporary-Arrays">Temporary Arrays</a><a id="Temporary-Arrays-1"></a><a class="docs-heading-anchor-permalink" href="#Temporary-Arrays" title="Permalink"></a></h3><p>As GPU memory is more expensive, all functions in AcceleratedKernels.jl expose any temporary arrays they will use (the <code>temp</code> argument); you can supply your own buffers to make the algorithms not allocate additional GPU storage, e.g.:</p><pre><code class="language-julia hljs">v = ROCArray(rand(Float32, 100_000))
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Performance Tips · AcceleratedKernels.jl</title><meta name="title" content="Performance Tips · AcceleratedKernels.jl"/><meta property="og:title" content="Performance Tips · AcceleratedKernels.jl"/><meta property="twitter:title" content="Performance Tips · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/performance/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/performance/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/performance/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li class="is-active"><a class="tocitem" href>Performance Tips</a><ul class="internal"><li><a class="tocitem" href="#Performance-Tips"><span>Performance Tips</span></a></li></ul></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../api/utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Performance Tips</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Performance Tips</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/performance.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="Performance-Tips"><a class="docs-heading-anchor" href="#Performance-Tips">Performance Tips</a><a id="Performance-Tips-1"></a><a class="docs-heading-anchor-permalink" href="#Performance-Tips" title="Permalink"></a></h2><p>If you just started using <code>AcceleratedKernels.jl</code>, see the Manual first for some examples.</p><h3 id="GPU-Block-Size-and-CPU-Threads"><a class="docs-heading-anchor" href="#GPU-Block-Size-and-CPU-Threads">GPU Block Size and CPU Threads</a><a id="GPU-Block-Size-and-CPU-Threads-1"></a><a class="docs-heading-anchor-permalink" href="#GPU-Block-Size-and-CPU-Threads" title="Permalink"></a></h3><p>All GPU functions allow you to specify a block size - this is often a power of two (mostly 64, 128, 256, 512); the optimum depends on the algorithm, input data and hardware - you can try the different values and <code>@time</code> or <code>@benchmark</code> them:</p><pre><code class="language-julia hljs">@time AK.foreachindex(f, itr_gpu, block_size=512)</code></pre><p>Similarly, for performance on the CPU the overhead of spawning threads should be masked by processing more elements per thread (but there is no reason here to launch more threads than <code>Threads.nthreads()</code>, the number of threads Julia was started with); the optimum depends on how expensive <code>f</code> is - again, benchmarking is your friend:</p><pre><code class="language-julia hljs">@time AK.foreachindex(f, itr_cpu, max_tasks=16, min_elems=1000)</code></pre><h3 id="Temporary-Arrays"><a class="docs-heading-anchor" href="#Temporary-Arrays">Temporary Arrays</a><a id="Temporary-Arrays-1"></a><a class="docs-heading-anchor-permalink" href="#Temporary-Arrays" title="Permalink"></a></h3><p>As GPU memory is more expensive, all functions in AcceleratedKernels.jl expose any temporary arrays they will use (the <code>temp</code> argument); you can supply your own buffers to make the algorithms not allocate additional GPU storage, e.g.:</p><pre><code class="language-julia hljs">v = ROCArray(rand(Float32, 100_000))
 temp = similar(v)
-AK.sort!(v, temp=temp)</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../benchmarks/">« Benchmarks</a><a class="docs-footer-nextpage" href="../api/using_backends/">Using Different Backends »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+AK.sort!(v, temp=temp)</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../benchmarks/">« Benchmarks</a><a class="docs-footer-nextpage" href="../api/using_backends/">Using Different Backends »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/references/index.html b/dev/references/index.html
index 0b0c659..e45ebd8 100644
--- a/dev/references/index.html
+++ b/dev/references/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>References · AcceleratedKernels.jl</title><meta name="title" content="References · AcceleratedKernels.jl"/><meta property="og:title" content="References · AcceleratedKernels.jl"/><meta property="twitter:title" content="References · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/references/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/references/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/references/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li class="is-active"><a class="tocitem" href>References</a><ul class="internal"><li><a class="tocitem" href="#References"><span>References</span></a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>References</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>References</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/references.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="References"><a class="docs-heading-anchor" href="#References">References</a><a id="References-1"></a><a class="docs-heading-anchor-permalink" href="#References" title="Permalink"></a></h2><div class="markdown"><p>This library is built on the unique Julia infrastructure for transpiling code to GPU backends, and years spent developing the <a href="https://juliagpu.org/">JuliaGPU</a> ecosystem that make it a joy to use. In particular, credit should go to the following people and work:</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>References · AcceleratedKernels.jl</title><meta name="title" content="References · AcceleratedKernels.jl"/><meta property="og:title" content="References · AcceleratedKernels.jl"/><meta property="twitter:title" content="References · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/references/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/references/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/references/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../api/utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li class="is-active"><a class="tocitem" href>References</a><ul class="internal"><li><a class="tocitem" href="#References"><span>References</span></a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>References</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>References</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/references.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="References"><a class="docs-heading-anchor" href="#References">References</a><a id="References-1"></a><a class="docs-heading-anchor-permalink" href="#References" title="Permalink"></a></h2><div class="markdown"><p>This library is built on the unique Julia infrastructure for transpiling code to GPU backends, and years spent developing the <a href="https://juliagpu.org/">JuliaGPU</a> ecosystem that make it a joy to use. In particular, credit should go to the following people and work:</p>
 <ul>
 <li><p>The Julia language design, which made code manipulation and generation a first class citizen: Bezanson J, Edelman A, Karpinski S, Shah VB. Julia: A fresh approach to numerical computing. SIAM review. 2017.</p>
 </li>
@@ -38,4 +38,4 @@
 </ul>
 </div><hr/><div class="markdown"><p>Designed and built by <a href="https://github.com/anicusan">Andrei-Leonard Nicusan</a>, maintained with <a href="https://github.com/juliagpu/AcceleratedKernels.jl/graphs/contributors">contributors</a>.</p>
 <p>Much of this work was possible because of the fantastic HPC resources at the University of Birmingham and the Birmingham Environment for Academic Research, which gave us free on-demand access to thousands of CPUs and GPUs that we experimented on, and the support teams we nagged. In particular, thank you to Kit Windows-Yule and Andrew Morris on the BlueBEAR and Baskerville T2 supercomputers&#39; leadership, and Simon Branford, Simon Hartley, James Allsopp and James Carpenter for computing support.</p>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../roadmap/">« Roadmap</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../roadmap/">« Roadmap</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/roadmap/index.html b/dev/roadmap/index.html
index 827d405..0715c24 100644
--- a/dev/roadmap/index.html
+++ b/dev/roadmap/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Roadmap · AcceleratedKernels.jl</title><meta name="title" content="Roadmap · AcceleratedKernels.jl"/><meta property="og:title" content="Roadmap · AcceleratedKernels.jl"/><meta property="twitter:title" content="Roadmap · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/roadmap/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/roadmap/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/roadmap/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li class="is-active"><a class="tocitem" href>Roadmap</a><ul class="internal"><li><a class="tocitem" href="#Roadmap-/-Future-Plans"><span>Roadmap / Future Plans</span></a></li></ul></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Roadmap</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Roadmap</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/roadmap.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="Roadmap-/-Future-Plans"><a class="docs-heading-anchor" href="#Roadmap-/-Future-Plans">Roadmap / Future Plans</a><a id="Roadmap-/-Future-Plans-1"></a><a class="docs-heading-anchor-permalink" href="#Roadmap-/-Future-Plans" title="Permalink"></a></h2><div class="markdown"><p>Help is very welcome for any of the below:</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Roadmap · AcceleratedKernels.jl</title><meta name="title" content="Roadmap · AcceleratedKernels.jl"/><meta property="og:title" content="Roadmap · AcceleratedKernels.jl"/><meta property="twitter:title" content="Roadmap · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/roadmap/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/roadmap/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/roadmap/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../api/utilities/">Utilities</a></li></ul></li><li><a class="tocitem" href="../testing/">Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li class="is-active"><a class="tocitem" href>Roadmap</a><ul class="internal"><li><a class="tocitem" href="#Roadmap-/-Future-Plans"><span>Roadmap / Future Plans</span></a></li></ul></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Roadmap</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Roadmap</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/roadmap.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h2 id="Roadmap-/-Future-Plans"><a class="docs-heading-anchor" href="#Roadmap-/-Future-Plans">Roadmap / Future Plans</a><a id="Roadmap-/-Future-Plans-1"></a><a class="docs-heading-anchor-permalink" href="#Roadmap-/-Future-Plans" title="Permalink"></a></h2><div class="markdown"><p>Help is very welcome for any of the below:</p>
 <ul>
 <li><p>Automated optimisation / tuning of e.g. <code>block_size</code> for a given input; can be made algorithm-agnostic.</p>
 <ul>
@@ -27,4 +27,4 @@
 <li><p><strong>Other ideas?</strong> Post an issue, or open a discussion on the Julia Discourse.</p>
 </li>
 </ul>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../debugging/">« Debugging Kernels</a><a class="docs-footer-nextpage" href="../references/">References »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../debugging/">« Debugging Kernels</a><a class="docs-footer-nextpage" href="../references/">References »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/search_index.js b/dev/search_index.js
index 8719741..b2b7e53 100644
--- a/dev/search_index.js
+++ b/dev/search_index.js
@@ -1,3 +1,3 @@
 var documenterSearchIndex = {"docs":
-[{"location":"references/#References","page":"References","title":"References","text":"","category":"section"},{"location":"references/","page":"References","title":"References","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 10. References\") # hide","category":"page"},{"location":"references/","page":"References","title":"References","text":"","category":"page"},{"location":"references/","page":"References","title":"References","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 11. Acknowledgements\") # hide","category":"page"},{"location":"api/sort/#sort-and-friends","page":"Sorting","title":"sort and friends","text":"","category":"section"},{"location":"api/sort/","page":"Sorting","title":"Sorting","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.4. `sort` and friends\") # hide","category":"page"},{"location":"api/accumulate/#Accumulate-/-Prefix-Sum-/-Scan","page":"Accumulate","title":"Accumulate / Prefix Sum / Scan","text":"","category":"section"},{"location":"api/accumulate/","page":"Accumulate","title":"Accumulate","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.7. `accumulate`\") # hide","category":"page"},{"location":"api/task_partition/#Multithreaded-Task-Partitioning","page":"Task Partitioning","title":"Multithreaded Task Partitioning","text":"","category":"section"},{"location":"api/task_partition/","page":"Task Partitioning","title":"Task Partitioning","text":"AcceleratedKernels.TaskPartitioner\nAcceleratedKernels.task_partition","category":"page"},{"location":"api/task_partition/#AcceleratedKernels.TaskPartitioner","page":"Task Partitioning","title":"AcceleratedKernels.TaskPartitioner","text":"struct TaskPartitioner\n\nPartitioning num_elems elements / jobs over maximum max_tasks tasks with minimum min_elems elements per task.\n\nMethods\n\nTaskPartitioner(num_elems, max_tasks=Threads.nthreads(), min_elems=1)\n\nFields\n\nnum_elems::Int64\nmax_tasks::Int64\nmin_elems::Int64\nnum_tasks::Int64\ntask_istarts::Vector{Int64}\n\nExamples\n\nusing AcceleratedKernels: TaskPartitioner\n\n# Divide 10 elements between 4 tasks\ntp = TaskPartitioner(10, 4)\nfor i in 1:tp.num_tasks\n    @show tp[i]\nend\n\n# output\ntp[i] = 1:3\ntp[i] = 4:6\ntp[i] = 7:8\ntp[i] = 9:10\n\nusing AcceleratedKernels: TaskPartitioner\n\n# Divide 20 elements between 6 tasks with minimum 5 elements per task.\n# Not all tasks will be required\ntp = TaskPartitioner(20, 6, 5)\nfor i in 1:tp.num_tasks\n    @show tp[i]\nend\n\n# output\ntp[i] = 1:5\ntp[i] = 6:10\ntp[i] = 11:15\ntp[i] = 16:20\n\n\n\n\n\n","category":"type"},{"location":"api/task_partition/#AcceleratedKernels.task_partition","page":"Task Partitioning","title":"AcceleratedKernels.task_partition","text":"task_partition(f, num_elems, max_tasks=Threads.nthreads(), min_elems=1)\ntask_partition(f, tp::TaskPartitioner)\n\nPartition num_elems jobs across at most num_tasks parallel tasks with at least min_elems per task, calling f(start_index:end_index), where the indices are between 1 and num_elems.\n\nExamples\n\nA toy example showing outputs:\n\nnum_elems = 4\ntask_partition(println, num_elems)\n\n# Output, possibly in a different order due to threading order\n1:1\n4:4\n2:2\n3:3\n\nThis function is probably most useful with a do-block, e.g.:\n\ntask_partition(4) do irange\n    some_long_computation(param1, param2, irange)\nend\n\n\n\n\n\n","category":"function"},{"location":"api/foreachindex/#General-Looping","page":"General Loops","title":"General Looping","text":"","category":"section"},{"location":"api/foreachindex/","page":"General Loops","title":"General Loops","text":"AcceleratedKernels.foreachindex","category":"page"},{"location":"api/foreachindex/#AcceleratedKernels.foreachindex","page":"General Loops","title":"AcceleratedKernels.foreachindex","text":"foreachindex(\n    f, itr, backend::Backend=get_backend(itr);\n\n    # CPU settings\n    scheduler=:threads,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size=256,\n)\n\nParallelised for loop over the indices of an iterable.\n\nIt allows you to run normal Julia code on a GPU over multiple arrays - e.g. CuArray, ROCArray, MtlArray, oneArray - with one GPU thread per index.\n\nOn CPUs at most max_tasks threads are launched, or fewer such that each thread processes at least min_elems indices; if a single task ends up being needed, f is inlined and no thread is launched. Tune it to your function - the more expensive it is, the fewer elements are needed to amortise the cost of launching a thread (which is a few μs). The scheduler can be :polyester to use Polyester.jl cheap threads or :threads to use normal Julia threads; either can be faster depending on the function, but in general the latter is more composable.\n\nExamples\n\nNormally you would write a for loop like this:\n\nx = Array(1:100)\ny = similar(x)\nfor i in eachindex(x)\n    @inbounds y[i] = 2 * x[i] + 1\nend\n\nUsing this function you can have the same for loop body over a GPU array:\n\nusing CUDA\nconst x = CuArray(1:100)\nconst y = similar(x)\nforeachindex(x) do i\n    @inbounds y[i] = 2 * x[i] + 1\nend\n\nNote that the above code is pure arithmetic, which you can write directly (and on some platforms it may be faster) as:\n\nusing CUDA\nx = CuArray(1:100)\ny = 2 .* x .+ 1\n\nImportant note: to use this function on a GPU, the objects referenced inside the loop body must have known types - i.e. be inside a function, or const global objects; but you shouldn't use global objects anyways. For example:\n\nusing oneAPI\n\nx = oneArray(1:100)\n\n# CRASHES - typical error message: \"Reason: unsupported dynamic function invocation\"\n# foreachindex(x) do i\n#     x[i] = i\n# end\n\nfunction somecopy!(v)\n    # Because it is inside a function, the type of `v` will be known\n    foreachindex(v) do i\n        v[i] = i\n    end\nend\n\nsomecopy!(x)    # This works\n\n\n\n\n\n","category":"function"},{"location":"api/map/#Map","page":"Map","title":"Map","text":"","category":"section"},{"location":"api/map/","page":"Map","title":"Map","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.3. `map`\") # hide","category":"page"},{"location":"api/map/","page":"Map","title":"Map","text":"","category":"page"},{"location":"api/map/","page":"Map","title":"Map","text":"AcceleratedKernels.map!","category":"page"},{"location":"api/map/#AcceleratedKernels.map!","page":"Map","title":"AcceleratedKernels.map!","text":"map!(\n    f, dst::AbstractArray, src::AbstractArray;\n\n    # CPU settings\n    scheduler=:threads,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size=256,    \n)\n\nApply the function f to each element of src and store the result in dst. The CPU and GPU settings are the same as for foreachindex.\n\n\n\n\n\n","category":"function"},{"location":"api/binarysearch/#Binary-Search","page":"Binary Search","title":"Binary Search","text":"","category":"section"},{"location":"api/binarysearch/","page":"Binary Search","title":"Binary Search","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.8. `searchsorted` and friends\") # hide","category":"page"},{"location":"benchmarks/#Benchmarks","page":"Benchmarks","title":"Benchmarks","text":"","category":"section"},{"location":"benchmarks/","page":"Benchmarks","title":"Benchmarks","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 3. Benchmarks\") # hide","category":"page"},{"location":"performance/#Performance-Tips","page":"Performance Tips","title":"Performance Tips","text":"","category":"section"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"If you just started using AcceleratedKernels.jl, see the Manual first for some examples.","category":"page"},{"location":"performance/#GPU-Block-Size-and-CPU-Threads","page":"Performance Tips","title":"GPU Block Size and CPU Threads","text":"","category":"section"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"All GPU functions allow you to specify a block size - this is often a power of two (mostly 64, 128, 256, 512); the optimum depends on the algorithm, input data and hardware - you can try the different values and @time or @benchmark them:","category":"page"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"@time AK.foreachindex(f, itr_gpu, block_size=512)","category":"page"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"Similarly, for performance on the CPU the overhead of spawning threads should be masked by processing more elements per thread (but there is no reason here to launch more threads than Threads.nthreads(), the number of threads Julia was started with); the optimum depends on how expensive f is - again, benchmarking is your friend:","category":"page"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"@time AK.foreachindex(f, itr_cpu, max_tasks=16, min_elems=1000)","category":"page"},{"location":"performance/#Temporary-Arrays","page":"Performance Tips","title":"Temporary Arrays","text":"","category":"section"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"As GPU memory is more expensive, all functions in AcceleratedKernels.jl expose any temporary arrays they will use (the temp argument); you can supply your own buffers to make the algorithms not allocate additional GPU storage, e.g.:","category":"page"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"v = ROCArray(rand(Float32, 100_000))\ntemp = similar(v)\nAK.sort!(v, temp=temp)","category":"page"},{"location":"api/custom_structs/#Custom-Structs","page":"Custom Structs","title":"Custom Structs","text":"","category":"section"},{"location":"api/custom_structs/","page":"Custom Structs","title":"Custom Structs","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 6. Custom Structs\") # hide","category":"page"},{"location":"roadmap/#Roadmap-/-Future-Plans","page":"Roadmap","title":"Roadmap / Future Plans","text":"","category":"section"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 9. Roadmap / Future Plans\") # hide","category":"page"},{"location":"debugging/#Debugging-Kernels","page":"Debugging Kernels","title":"Debugging Kernels","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"As the compilation pipeline of GPU kernels is different to that of base Julia, error messages also look different - for example, where Julia would insert an exception when a variable name was not defined (e.g. we had a typo), a GPU kernel throwing exceptions cannot be compiled and instead you'll see some cascading errors like \"[...] compiling [...] resulted in invalid LLVM IR\" caused by \"Reason: unsupported use of an undefined name\" resulting in \"Reason: unsupported dynamic function invocation\", etc.","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Thankfully, there are only about 3 types of such error messages and they're not that scary when you look into them.","category":"page"},{"location":"debugging/#Undefined-Variables-/-Typos","page":"Debugging Kernels","title":"Undefined Variables / Typos","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"If you misspell a variable name, Julia would insert an exception:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function set_color(v, color)\n    AK.foreachindex(v) do i\n        v[i] = colour           # Grab your porridge\n    end\nend","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"However, exceptions cannot be compiled on GPUs and you will see cascading errors like below:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"(Image: Undefined Name Error)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"The key thing to look for is undefined name, then search for it in your code.","category":"page"},{"location":"debugging/#Exceptions-and-Checks-that-throw","page":"Debugging Kernels","title":"Exceptions and Checks that throw","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"As mentioned above, exceptions cannot be compiled in GPU kernels; however, many normal-looking functions that we reference in kernels may contain argument-checking. If it cannot be proved that a check branch would not throw an exception, you will see a similar cascade of errors. For example, casting a Float32 to an Int32 includes an InexactError exception check - see this tame-looking code:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function mymul!(v)\n    AK.foreachindex(v) do i\n        v[i] *= 2f0\n    end\nend\n\nv = MtlArray(1:1000)\nmymul!(v)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"See any problem with it? The MtlArray(1:1000) creates a GPU vector filled with Int64 values, but within foreachindex we do v[i] *= 2.0. We are multiplying an Int64 by a Float32, resulting in a Float32 value that we try to write back into v - this may throw an exception, like in normal Julia code:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"julia> x = [1, 2, 3];\njulia> x[1] = 42.5\nERROR: InexactError: Int64(42.5)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"On GPUs you will see an error like this:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"(Image: Check Exception Error)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Note the error stack: setindex!, convert, Int64, box_float32 - because of the exception check, we have a type instability, which in turn results in boxing values behind pointers, in turn resulting in dynamic memory allocation and finally the error we see at the top, unsupported call to gpu_malloc.","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"You may need to do your correctness checks manually, without exceptions; in this specific case, if we did want to cast a Float32 to an Int, we could use unsafe_trunc(T, x) - though be careful when using unsafe functions that you understand their behaviour and assumptions (e.g. log has a DomainError check for negative values):","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function mymul!(v)\n    AK.foreachindex(v) do i\n        v[i] = unsafe_trunc(eltype(v), v[i] * 2.5f0)\n    end\nend\n\nv = MtlArray(1:1000)\nmymul!(v)","category":"page"},{"location":"debugging/#Type-Instability-/-Global-Variables","page":"Debugging Kernels","title":"Type Instability / Global Variables","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Types must be known to be captured and compiled within GPU kernels. Global variables without const are not type-stable, as you could associate a different value later on in a script:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"v = MtlArray(1:1000)\n\nAK.foreachindex(v) do i\n    v[i] *= 2\nend\n\nv = \"potato\"","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"The error stack is a bit more difficult here:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"(Image: Type Unstable Error)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"You see a few dynamic function invocation, an unsupported call to gpu_malloc, and a bit further down a box. The more operations you do on the type-unstable object, the more dynamic function invocation errors you'll see. These would also be the steps Base Julia would take to allow dynamically-changing objects: they'd be put in a Box behind pointers, and allocated on the heap. In a way, it is better that we cannot do that on a GPU, as it hurts performance massively.","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"There are two ways to solve this - if you really want to use global variables in a script, put them behind a const:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"const v = MtlArray(1:1000)\n\nAK.foreachindex(v) do i\n    v[i] *= 2\nend\n\n# This would give you an error now\n# v = \"potato\"","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Or better, use functions:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function mymul!(v, x)\n    AK.foreachindex(v) do i\n        v[i] *= x\n    end\nend\n\nv = MtlArray(1:1000)\nmymul!(v, 2)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Note that Julia's lambda capture is very powerful - inside AK.foreachindex you can references other objects from within the function (like x), without explicitly passing them to the GPU.","category":"page"},{"location":"debugging/#Apple-Metal-Only:-Float64-is-not-Supported","page":"Debugging Kernels","title":"Apple Metal Only: Float64 is not Supported","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Mac GPUs do not natively support Float64 values; there is a high-level check when trying to create an array:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"julia> x = MtlArray([1.0, 2.0, 3.0])\nERROR: Metal does not support Float64 values, try using Float32 instead","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"However, if we tried to use / convert values in a kernel to a Float64:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function mymul!(v, x)\n    AK.foreachindex(v) do i\n        v[i] *= x\n    end\nend\n\nv = MtlArray{Float32}(1:1000)\nmymul!(v, 2.0)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Note that we try to multiply Float32 values by 2.0, which is a Float64 - in which case we get:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"ERROR: LoadError: Compilation to native code failed; see below for details.\n[...]\ncaused by: NSError: Compiler encountered an internal error (AGXMetalG15X_M1, code 3)\n[...]","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Change the 2.0 to 2.0f0 or Float32(2); in kernels with generic types (that are supposed to work on multiple possible input types), do use the same types as your inputs, using e.g. T = eltype(v) then zero(T), T(42), etc.","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"For other library-related problems, feel free to post a GitHub issue. For help implementing new code, or just advice, you can also use the Julia Discourse forum, the community is incredibly helpful.","category":"page"},{"location":"api/predicates/#Predicates","page":"Predicates","title":"Predicates","text":"","category":"section"},{"location":"api/predicates/","page":"Predicates","title":"Predicates","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.9. `all` / `any`\") # hide","category":"page"},{"location":"api/predicates/","page":"Predicates","title":"Predicates","text":"Note on the cooperative keyword: some older platforms crash when multiple threads write to the same memory location in a global array (e.g. old Intel Graphics); if all threads were to write the same value, it is well-defined on others (e.g. CUDA F4.2 says \"If a non-atomic instruction executed by a warp writes to the same location in global memory for more than one of the threads of the warp, only one thread performs a write and which thread does it is undefined.\"). This \"cooperative\" thread behaviour allows for a faster implementation; if you have a platform - the only one I know is Intel UHD Graphics - that crashes, set cooperative=false to use a safer mapreduce-based implementation.","category":"page"},{"location":"testing/#Testing","page":"Testing","title":"Testing","text":"","category":"section"},{"location":"testing/","page":"Testing","title":"Testing","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 7. Testing\") # hide","category":"page"},{"location":"api/mapreduce/#MapReduce","page":"MapReduce","title":"MapReduce","text":"","category":"section"},{"location":"api/mapreduce/","page":"MapReduce","title":"MapReduce","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.6. `mapreduce`\") # hide","category":"page"},{"location":"api/mapreduce/","page":"MapReduce","title":"MapReduce","text":"","category":"page"},{"location":"api/mapreduce/","page":"MapReduce","title":"MapReduce","text":"AcceleratedKernels.mapreduce","category":"page"},{"location":"api/mapreduce/#AcceleratedKernels.mapreduce","page":"MapReduce","title":"AcceleratedKernels.mapreduce","text":"mapreduce(\n    f, op, src::AbstractArray;\n    init,\n    dims::Union{Nothing, Int}=nothing,\n\n    # CPU settings\n    scheduler=:static,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size::Int=256,\n    temp::Union{Nothing, AbstractArray}=nothing,\n    switch_below::Int=0,\n)\n\nReduce src along dimensions dims using the binary operator op after applying f elementwise. If dims is nothing, reduce src to a scalar. If dims is an integer, reduce src along that dimension. The init value is used as the initial value for the reduction (i.e. after mapping).\n\nCPU settings\n\nThe scheduler can be one of the OhMyThreads.jl schedulers, i.e. :static, :dynamic, :greedy or :serial. Assuming the workload is uniform (as the GPU algorithm prefers), :static is used by default; if you need fine-grained control over your threads, consider using OhMyThreads.jl directly.\n\nUse at most max_tasks threads with at least min_elems elements per task.\n\nGPU settings\n\nThe block_size parameter controls the number of threads per block.\n\nThe temp parameter can be used to pass a pre-allocated temporary array. For reduction to a scalar (dims=nothing), length(temp) >= 2 * (length(src) + 2 * block_size - 1) ÷ (2 * block_size) is required. For reduction along a dimension (dims is an integer), temp is used as the destination array, and thus must have the exact dimensions required - i.e. same dimensionwise sizes as src, except for the reduced dimension which becomes 1; there are some corner cases when one dimension is zero, check against Base.reduce for CPU arrays for exact behavior.\n\nThe switch_below parameter controls the threshold below which the reduction is performed on the CPU and is only used for 1D reductions (i.e. dims=nothing).\n\nExample\n\nComputing a sum of squares, reducing down to a scalar that is copied to host:\n\nimport AcceleratedKernels as AK\nusing CUDA\n\nv = CuArray{Int16}(rand(1:1000, 100_000))\nvsumsq = AK.mapreduce(x -> x * x, (x, y) -> x + y, v; init=zero(eltype(v)))\n\nComputing dimensionwise sums of squares in a 2D matrix:\n\nimport AcceleratedKernels as AK\nusing Metal\n\nf(x) = x * x\nm = MtlArray(rand(Int32(1):Int32(100), 10, 100_000))\nmrowsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=1)\nmcolsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=2)\n\n\n\n\n\n","category":"function"},{"location":"api/using_backends/#Using-Different-Backends","page":"Using Different Backends","title":"Using Different Backends","text":"","category":"section"},{"location":"api/using_backends/","page":"Using Different Backends","title":"Using Different Backends","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.1. Using Different Backends\") # hide","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"(Image: Logo)","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"Parallel algorithm building blocks for the Julia ecosystem, targeting multithreaded CPUs, and GPUs via Intel oneAPI, AMD ROCm, Apple Metal and Nvidia CUDA (and any future backends added to the JuliaGPU organisation).","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"","category":"page"},{"location":"#What's-Different?","page":"Overview","title":"What's Different?","text":"","category":"section"},{"location":"","page":"Overview","title":"Overview","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 1. What's Different?\") # hide","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"","category":"page"},{"location":"#Status","page":"Overview","title":"Status","text":"","category":"section"},{"location":"","page":"Overview","title":"Overview","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 2. Status\") # hide","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"","category":"page"},{"location":"#Acknowledgements","page":"Overview","title":"Acknowledgements","text":"","category":"section"},{"location":"","page":"Overview","title":"Overview","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 11. Acknowledgements\") # hide","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"","category":"page"},{"location":"#License","page":"Overview","title":"License","text":"","category":"section"},{"location":"","page":"Overview","title":"Overview","text":"AcceleratedKernels.jl is MIT-licensed. Enjoy.","category":"page"},{"location":"api/reduce/#Reductions","page":"Reduce","title":"Reductions","text":"","category":"section"},{"location":"api/reduce/","page":"Reduce","title":"Reduce","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.5. `reduce`\") # hide","category":"page"},{"location":"api/reduce/","page":"Reduce","title":"Reduce","text":"","category":"page"},{"location":"api/reduce/","page":"Reduce","title":"Reduce","text":"AcceleratedKernels.reduce","category":"page"},{"location":"api/reduce/#AcceleratedKernels.reduce","page":"Reduce","title":"AcceleratedKernels.reduce","text":"reduce(\n    op, src::AbstractArray;\n    init,\n    dims::Union{Nothing, Int}=nothing,\n\n    # CPU settings\n    scheduler=:static,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size::Int=256,\n    temp::Union{Nothing, AbstractGPUArray}=nothing,\n    switch_below::Int=0,\n)\n\nReduce src along dimensions dims using the binary operator op. If dims is nothing, reduce src to a scalar. If dims is an integer, reduce src along that dimension. The init value is used as the initial value for the reduction.\n\nCPU settings\n\nThe scheduler can be one of the OhMyThreads.jl schedulers, i.e. :static, :dynamic, :greedy or :serial. Assuming the workload is uniform (as the GPU algorithm prefers), :static is used by default; if you need fine-grained control over your threads, consider using OhMyThreads.jl directly.\n\nUse at most max_tasks threads with at least min_elems elements per task.\n\nGPU settings\n\nThe block_size parameter controls the number of threads per block.\n\nThe temp parameter can be used to pass a pre-allocated temporary array. For reduction to a scalar (dims=nothing), length(temp) >= 2 * (length(src) + 2 * block_size - 1) ÷ (2 * block_size) is required. For reduction along a dimension (dims is an integer), temp is used as the destination array, and thus must have the exact dimensions required - i.e. same dimensionwise sizes as src, except for the reduced dimension which becomes 1; there are some corner cases when one dimension is zero, check against Base.reduce for CPU arrays for exact behavior.\n\nThe switch_below parameter controls the threshold below which the reduction is performed on the CPU and is only used for 1D reductions (i.e. dims=nothing).\n\nExample\n\nComputing a sum, reducing down to a scalar that is copied to host:\n\nimport AcceleratedKernels as AK\nusing CUDA\n\nv = CuArray{Int16}(rand(1:1000, 100_000))\nvsum = AK.reduce((x, y) -> x + y, v; init=zero(eltype(v)))\n\nComputing dimensionwise sums in a 2D matrix:\n\nimport AcceleratedKernels as AK\nusing Metal\n\nm = MtlArray(rand(Int32(1):Int32(100), 10, 100_000))\nmrowsum = AK.reduce(+, m; init=zero(eltype(m)), dims=1)\nmcolsum = AK.reduce(+, m; init=zero(eltype(m)), dims=2)\n\n\n\n\n\n","category":"function"}]
+[{"location":"references/#References","page":"References","title":"References","text":"","category":"section"},{"location":"references/","page":"References","title":"References","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 10. References\") # hide","category":"page"},{"location":"references/","page":"References","title":"References","text":"","category":"page"},{"location":"references/","page":"References","title":"References","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 11. Acknowledgements\") # hide","category":"page"},{"location":"api/sort/#sort-and-friends","page":"Sorting","title":"sort and friends","text":"","category":"section"},{"location":"api/sort/","page":"Sorting","title":"Sorting","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.4. `sort` and friends\") # hide","category":"page"},{"location":"api/accumulate/#Accumulate-/-Prefix-Sum-/-Scan","page":"Accumulate","title":"Accumulate / Prefix Sum / Scan","text":"","category":"section"},{"location":"api/accumulate/","page":"Accumulate","title":"Accumulate","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.7. `accumulate`\") # hide","category":"page"},{"location":"api/task_partition/#Multithreaded-Task-Partitioning","page":"Task Partitioning","title":"Multithreaded Task Partitioning","text":"","category":"section"},{"location":"api/task_partition/","page":"Task Partitioning","title":"Task Partitioning","text":"AcceleratedKernels.TaskPartitioner\nAcceleratedKernels.task_partition","category":"page"},{"location":"api/task_partition/#AcceleratedKernels.TaskPartitioner","page":"Task Partitioning","title":"AcceleratedKernels.TaskPartitioner","text":"struct TaskPartitioner\n\nPartitioning num_elems elements / jobs over maximum max_tasks tasks with minimum min_elems elements per task.\n\nMethods\n\nTaskPartitioner(num_elems, max_tasks=Threads.nthreads(), min_elems=1)\n\nFields\n\nnum_elems::Int64\nmax_tasks::Int64\nmin_elems::Int64\nnum_tasks::Int64\ntask_istarts::Vector{Int64}\n\nExamples\n\nusing AcceleratedKernels: TaskPartitioner\n\n# Divide 10 elements between 4 tasks\ntp = TaskPartitioner(10, 4)\nfor i in 1:tp.num_tasks\n    @show tp[i]\nend\n\n# output\ntp[i] = 1:3\ntp[i] = 4:6\ntp[i] = 7:8\ntp[i] = 9:10\n\nusing AcceleratedKernels: TaskPartitioner\n\n# Divide 20 elements between 6 tasks with minimum 5 elements per task.\n# Not all tasks will be required\ntp = TaskPartitioner(20, 6, 5)\nfor i in 1:tp.num_tasks\n    @show tp[i]\nend\n\n# output\ntp[i] = 1:5\ntp[i] = 6:10\ntp[i] = 11:15\ntp[i] = 16:20\n\n\n\n\n\n","category":"type"},{"location":"api/task_partition/#AcceleratedKernels.task_partition","page":"Task Partitioning","title":"AcceleratedKernels.task_partition","text":"task_partition(f, num_elems, max_tasks=Threads.nthreads(), min_elems=1)\ntask_partition(f, tp::TaskPartitioner)\n\nPartition num_elems jobs across at most num_tasks parallel tasks with at least min_elems per task, calling f(start_index:end_index), where the indices are between 1 and num_elems.\n\nExamples\n\nA toy example showing outputs:\n\nnum_elems = 4\ntask_partition(println, num_elems)\n\n# Output, possibly in a different order due to threading order\n1:1\n4:4\n2:2\n3:3\n\nThis function is probably most useful with a do-block, e.g.:\n\ntask_partition(4) do irange\n    some_long_computation(param1, param2, irange)\nend\n\n\n\n\n\n","category":"function"},{"location":"api/foreachindex/#General-Looping","page":"General Loops","title":"General Looping","text":"","category":"section"},{"location":"api/foreachindex/","page":"General Loops","title":"General Loops","text":"AcceleratedKernels.foreachindex\nAcceleratedKernels.foraxes","category":"page"},{"location":"api/foreachindex/#AcceleratedKernels.foreachindex","page":"General Loops","title":"AcceleratedKernels.foreachindex","text":"foreachindex(\n    f, itr, backend::Backend=get_backend(itr);\n\n    # CPU settings\n    scheduler=:threads,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size=256,\n)\n\nParallelised for loop over the indices of an iterable.\n\nIt allows you to run normal Julia code on a GPU over multiple arrays - e.g. CuArray, ROCArray, MtlArray, oneArray - with one GPU thread per index.\n\nOn CPUs at most max_tasks threads are launched, or fewer such that each thread processes at least min_elems indices; if a single task ends up being needed, f is inlined and no thread is launched. Tune it to your function - the more expensive it is, the fewer elements are needed to amortise the cost of launching a thread (which is a few μs). The scheduler can be :polyester to use Polyester.jl cheap threads or :threads to use normal Julia threads; either can be faster depending on the function, but in general the latter is more composable.\n\nExamples\n\nNormally you would write a for loop like this:\n\nx = Array(1:100)\ny = similar(x)\nfor i in eachindex(x)\n    @inbounds y[i] = 2 * x[i] + 1\nend\n\nUsing this function you can have the same for loop body over a GPU array:\n\nusing CUDA\nimport AcceleratedKernels as AK\nconst x = CuArray(1:100)\nconst y = similar(x)\nAK.foreachindex(x) do i\n    @inbounds y[i] = 2 * x[i] + 1\nend\n\nNote that the above code is pure arithmetic, which you can write directly (and on some platforms it may be faster) as:\n\nusing CUDA\nx = CuArray(1:100)\ny = 2 .* x .+ 1\n\nImportant note: to use this function on a GPU, the objects referenced inside the loop body must have known types - i.e. be inside a function, or const global objects; but you shouldn't use global objects anyways. For example:\n\nusing oneAPI\nimport AcceleratedKernels as AK\n\nx = oneArray(1:100)\n\n# CRASHES - typical error message: \"Reason: unsupported dynamic function invocation\"\n# AK.foreachindex(x) do i\n#     x[i] = i\n# end\n\nfunction somecopy!(v)\n    # Because it is inside a function, the type of `v` will be known\n    AK.foreachindex(v) do i\n        v[i] = i\n    end\nend\n\nsomecopy!(x)    # This works\n\n\n\n\n\n","category":"function"},{"location":"api/foreachindex/#AcceleratedKernels.foraxes","page":"General Loops","title":"AcceleratedKernels.foraxes","text":"foraxes(\n    f, itr, dims::Union{Nothing, <:Integer}=nothing, backend::Backend=get_backend(itr);\n\n    # CPU settings\n    scheduler=:threads,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size=256,\n)\n\nParallelised for loop over the indices along axis dims of an iterable.\n\nIt allows you to run normal Julia code on a GPU over multiple arrays - e.g. CuArray, ROCArray, MtlArray, oneArray - with one GPU thread per index.\n\nOn CPUs at most max_tasks threads are launched, or fewer such that each thread processes at least min_elems indices; if a single task ends up being needed, f is inlined and no thread is launched. Tune it to your function - the more expensive it is, the fewer elements are needed to amortise the cost of launching a thread (which is a few μs). The scheduler can be :polyester to use Polyester.jl cheap threads or :threads to use normal Julia threads; either can be faster depending on the function, but in general the latter is more composable.\n\nExamples\n\nNormally you would write a for loop like this:\n\nx = Array(reshape(1:30, 3, 10))\ny = similar(x)\nfor i in axes(x, 2)\n    for j in axes(x, 1)\n        @inbounds y[j, i] = 2 * x[j, i] + 1\n    end\nend\n\nUsing this function you can have the same for loop body over a GPU array:\n\nusing CUDA\nimport AcceleratedKernels as AK\nconst x = CuArray(reshape(1:3000, 3, 1000))\nconst y = similar(x)\nAK.foraxes(x, 2) do i\n    for j in axes(x, 1)\n        @inbounds y[j, i] = 2 * x[j, i] + 1\n    end\nend\n\nImportant note: to use this function on a GPU, the objects referenced inside the loop body must have known types - i.e. be inside a function, or const global objects; but you shouldn't use global objects anyways. For example:\n\nusing oneAPI\nimport AcceleratedKernels as AK\n\nx = oneArray(reshape(1:3000, 3, 1000))\n\n# CRASHES - typical error message: \"Reason: unsupported dynamic function invocation\"\n# AK.foraxes(x) do i\n#     x[i] = i\n# end\n\nfunction somecopy!(v)\n    # Because it is inside a function, the type of `v` will be known\n    AK.foraxes(v) do i\n        v[i] = i\n    end\nend\n\nsomecopy!(x)    # This works\n\n\n\n\n\n","category":"function"},{"location":"api/map/#Map","page":"Map","title":"Map","text":"","category":"section"},{"location":"api/map/","page":"Map","title":"Map","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.3. `map`\") # hide","category":"page"},{"location":"api/map/","page":"Map","title":"Map","text":"","category":"page"},{"location":"api/map/","page":"Map","title":"Map","text":"AcceleratedKernels.map!","category":"page"},{"location":"api/map/#AcceleratedKernels.map!","page":"Map","title":"AcceleratedKernels.map!","text":"map!(\n    f, dst::AbstractArray, src::AbstractArray;\n\n    # CPU settings\n    scheduler=:threads,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size=256,    \n)\n\nApply the function f to each element of src and store the result in dst. The CPU and GPU settings are the same as for foreachindex.\n\n\n\n\n\n","category":"function"},{"location":"api/binarysearch/#Binary-Search","page":"Binary Search","title":"Binary Search","text":"","category":"section"},{"location":"api/binarysearch/","page":"Binary Search","title":"Binary Search","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.8. `searchsorted` and friends\") # hide","category":"page"},{"location":"benchmarks/#Benchmarks","page":"Benchmarks","title":"Benchmarks","text":"","category":"section"},{"location":"benchmarks/","page":"Benchmarks","title":"Benchmarks","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 3. Benchmarks\") # hide","category":"page"},{"location":"performance/#Performance-Tips","page":"Performance Tips","title":"Performance Tips","text":"","category":"section"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"If you just started using AcceleratedKernels.jl, see the Manual first for some examples.","category":"page"},{"location":"performance/#GPU-Block-Size-and-CPU-Threads","page":"Performance Tips","title":"GPU Block Size and CPU Threads","text":"","category":"section"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"All GPU functions allow you to specify a block size - this is often a power of two (mostly 64, 128, 256, 512); the optimum depends on the algorithm, input data and hardware - you can try the different values and @time or @benchmark them:","category":"page"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"@time AK.foreachindex(f, itr_gpu, block_size=512)","category":"page"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"Similarly, for performance on the CPU the overhead of spawning threads should be masked by processing more elements per thread (but there is no reason here to launch more threads than Threads.nthreads(), the number of threads Julia was started with); the optimum depends on how expensive f is - again, benchmarking is your friend:","category":"page"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"@time AK.foreachindex(f, itr_cpu, max_tasks=16, min_elems=1000)","category":"page"},{"location":"performance/#Temporary-Arrays","page":"Performance Tips","title":"Temporary Arrays","text":"","category":"section"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"As GPU memory is more expensive, all functions in AcceleratedKernels.jl expose any temporary arrays they will use (the temp argument); you can supply your own buffers to make the algorithms not allocate additional GPU storage, e.g.:","category":"page"},{"location":"performance/","page":"Performance Tips","title":"Performance Tips","text":"v = ROCArray(rand(Float32, 100_000))\ntemp = similar(v)\nAK.sort!(v, temp=temp)","category":"page"},{"location":"api/utilities/#Utilities","page":"Utilities","title":"Utilities","text":"","category":"section"},{"location":"api/utilities/","page":"Utilities","title":"Utilities","text":"AcceleratedKernels.TypeWrap","category":"page"},{"location":"api/utilities/#AcceleratedKernels.TypeWrap","page":"Utilities","title":"AcceleratedKernels.TypeWrap","text":"struct TypeWrap{T} end\nTypeWrap(T) = TypeWrap{T}()\nBase.:*(x::Number, ::TypeWrap{T}) where T = T(x)\n\nAllow type conversion via multiplication, like 5i32 for 5 * i32 where i32 is a TypeWrap.\n\nExamples\n\nimport AcceleratedKernels as AK\nu32 = AK.TypeWrap{UInt32}\nprintln(typeof(5u32))\n\n# output\nUInt32\n\nThis is used e.g. to set integer literals inside kernels as u16 to ensure no indices are promoted beyond the index base type.\n\nFor example, Metal uses UInt32 indices, but if it is mixed with a Julia integer literal (Int64 by default) like in src[ithread + 1], we incur a type cast to Int64. Instead, we can use src[ithread + 1u16] or src[ithread + 0x1] to ensure the index is UInt32 and avoid the cast; as the integer literal 1u16 has a shorter type than ithread, it is automatically promoted (at compile time) to the ithread type, whether ithread is signed or unsigned as per the backend.\n\n# Defaults defined\n1u8, 2u16, 3u32, 4u64\n5i8, 6i16, 7i32, 8i64\n\n\n\n\n\n","category":"type"},{"location":"api/custom_structs/#Custom-Structs","page":"Custom Structs","title":"Custom Structs","text":"","category":"section"},{"location":"api/custom_structs/","page":"Custom Structs","title":"Custom Structs","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 6. Custom Structs\") # hide","category":"page"},{"location":"roadmap/#Roadmap-/-Future-Plans","page":"Roadmap","title":"Roadmap / Future Plans","text":"","category":"section"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 9. Roadmap / Future Plans\") # hide","category":"page"},{"location":"debugging/#Debugging-Kernels","page":"Debugging Kernels","title":"Debugging Kernels","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"As the compilation pipeline of GPU kernels is different to that of base Julia, error messages also look different - for example, where Julia would insert an exception when a variable name was not defined (e.g. we had a typo), a GPU kernel throwing exceptions cannot be compiled and instead you'll see some cascading errors like \"[...] compiling [...] resulted in invalid LLVM IR\" caused by \"Reason: unsupported use of an undefined name\" resulting in \"Reason: unsupported dynamic function invocation\", etc.","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Thankfully, there are only about 3 types of such error messages and they're not that scary when you look into them.","category":"page"},{"location":"debugging/#Undefined-Variables-/-Typos","page":"Debugging Kernels","title":"Undefined Variables / Typos","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"If you misspell a variable name, Julia would insert an exception:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function set_color(v, color)\n    AK.foreachindex(v) do i\n        v[i] = colour           # Grab your porridge\n    end\nend","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"However, exceptions cannot be compiled on GPUs and you will see cascading errors like below:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"(Image: Undefined Name Error)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"The key thing to look for is undefined name, then search for it in your code.","category":"page"},{"location":"debugging/#Exceptions-and-Checks-that-throw","page":"Debugging Kernels","title":"Exceptions and Checks that throw","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"As mentioned above, exceptions cannot be compiled in GPU kernels; however, many normal-looking functions that we reference in kernels may contain argument-checking. If it cannot be proved that a check branch would not throw an exception, you will see a similar cascade of errors. For example, casting a Float32 to an Int32 includes an InexactError exception check - see this tame-looking code:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function mymul!(v)\n    AK.foreachindex(v) do i\n        v[i] *= 2f0\n    end\nend\n\nv = MtlArray(1:1000)\nmymul!(v)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"See any problem with it? The MtlArray(1:1000) creates a GPU vector filled with Int64 values, but within foreachindex we do v[i] *= 2.0. We are multiplying an Int64 by a Float32, resulting in a Float32 value that we try to write back into v - this may throw an exception, like in normal Julia code:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"julia> x = [1, 2, 3];\njulia> x[1] = 42.5\nERROR: InexactError: Int64(42.5)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"On GPUs you will see an error like this:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"(Image: Check Exception Error)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Note the error stack: setindex!, convert, Int64, box_float32 - because of the exception check, we have a type instability, which in turn results in boxing values behind pointers, in turn resulting in dynamic memory allocation and finally the error we see at the top, unsupported call to gpu_malloc.","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"You may need to do your correctness checks manually, without exceptions; in this specific case, if we did want to cast a Float32 to an Int, we could use unsafe_trunc(T, x) - though be careful when using unsafe functions that you understand their behaviour and assumptions (e.g. log has a DomainError check for negative values):","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function mymul!(v)\n    AK.foreachindex(v) do i\n        v[i] = unsafe_trunc(eltype(v), v[i] * 2.5f0)\n    end\nend\n\nv = MtlArray(1:1000)\nmymul!(v)","category":"page"},{"location":"debugging/#Type-Instability-/-Global-Variables","page":"Debugging Kernels","title":"Type Instability / Global Variables","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Types must be known to be captured and compiled within GPU kernels. Global variables without const are not type-stable, as you could associate a different value later on in a script:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"v = MtlArray(1:1000)\n\nAK.foreachindex(v) do i\n    v[i] *= 2\nend\n\nv = \"potato\"","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"The error stack is a bit more difficult here:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"(Image: Type Unstable Error)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"You see a few dynamic function invocation, an unsupported call to gpu_malloc, and a bit further down a box. The more operations you do on the type-unstable object, the more dynamic function invocation errors you'll see. These would also be the steps Base Julia would take to allow dynamically-changing objects: they'd be put in a Box behind pointers, and allocated on the heap. In a way, it is better that we cannot do that on a GPU, as it hurts performance massively.","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"There are two ways to solve this - if you really want to use global variables in a script, put them behind a const:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"const v = MtlArray(1:1000)\n\nAK.foreachindex(v) do i\n    v[i] *= 2\nend\n\n# This would give you an error now\n# v = \"potato\"","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Or better, use functions:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function mymul!(v, x)\n    AK.foreachindex(v) do i\n        v[i] *= x\n    end\nend\n\nv = MtlArray(1:1000)\nmymul!(v, 2)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Note that Julia's lambda capture is very powerful - inside AK.foreachindex you can references other objects from within the function (like x), without explicitly passing them to the GPU.","category":"page"},{"location":"debugging/#Apple-Metal-Only:-Float64-is-not-Supported","page":"Debugging Kernels","title":"Apple Metal Only: Float64 is not Supported","text":"","category":"section"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Mac GPUs do not natively support Float64 values; there is a high-level check when trying to create an array:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"julia> x = MtlArray([1.0, 2.0, 3.0])\nERROR: Metal does not support Float64 values, try using Float32 instead","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"However, if we tried to use / convert values in a kernel to a Float64:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"function mymul!(v, x)\n    AK.foreachindex(v) do i\n        v[i] *= x\n    end\nend\n\nv = MtlArray{Float32}(1:1000)\nmymul!(v, 2.0)","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Note that we try to multiply Float32 values by 2.0, which is a Float64 - in which case we get:","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"ERROR: LoadError: Compilation to native code failed; see below for details.\n[...]\ncaused by: NSError: Compiler encountered an internal error (AGXMetalG15X_M1, code 3)\n[...]","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"Change the 2.0 to 2.0f0 or Float32(2); in kernels with generic types (that are supposed to work on multiple possible input types), do use the same types as your inputs, using e.g. T = eltype(v) then zero(T), T(42), etc.","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"","category":"page"},{"location":"debugging/","page":"Debugging Kernels","title":"Debugging Kernels","text":"For other library-related problems, feel free to post a GitHub issue. For help implementing new code, or just advice, you can also use the Julia Discourse forum, the community is incredibly helpful.","category":"page"},{"location":"api/predicates/#Predicates","page":"Predicates","title":"Predicates","text":"","category":"section"},{"location":"api/predicates/","page":"Predicates","title":"Predicates","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.9. `all` / `any`\") # hide","category":"page"},{"location":"api/predicates/","page":"Predicates","title":"Predicates","text":"Note on the cooperative keyword: some older platforms crash when multiple threads write to the same memory location in a global array (e.g. old Intel Graphics); if all threads were to write the same value, it is well-defined on others (e.g. CUDA F4.2 says \"If a non-atomic instruction executed by a warp writes to the same location in global memory for more than one of the threads of the warp, only one thread performs a write and which thread does it is undefined.\"). This \"cooperative\" thread behaviour allows for a faster implementation; if you have a platform - the only one I know is Intel UHD Graphics - that crashes, set cooperative=false to use a safer mapreduce-based implementation.","category":"page"},{"location":"testing/#Testing","page":"Testing","title":"Testing","text":"","category":"section"},{"location":"testing/","page":"Testing","title":"Testing","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 7. Testing\") # hide","category":"page"},{"location":"api/mapreduce/#MapReduce","page":"MapReduce","title":"MapReduce","text":"","category":"section"},{"location":"api/mapreduce/","page":"MapReduce","title":"MapReduce","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.6. `mapreduce`\") # hide","category":"page"},{"location":"api/mapreduce/","page":"MapReduce","title":"MapReduce","text":"","category":"page"},{"location":"api/mapreduce/","page":"MapReduce","title":"MapReduce","text":"AcceleratedKernels.mapreduce","category":"page"},{"location":"api/mapreduce/#AcceleratedKernels.mapreduce","page":"MapReduce","title":"AcceleratedKernels.mapreduce","text":"mapreduce(\n    f, op, src::AbstractArray;\n    init,\n    dims::Union{Nothing, Int}=nothing,\n\n    # CPU settings\n    scheduler=:static,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size::Int=256,\n    temp::Union{Nothing, AbstractArray}=nothing,\n    switch_below::Int=0,\n)\n\nReduce src along dimensions dims using the binary operator op after applying f elementwise. If dims is nothing, reduce src to a scalar. If dims is an integer, reduce src along that dimension. The init value is used as the initial value for the reduction (i.e. after mapping).\n\nCPU settings\n\nThe scheduler can be one of the OhMyThreads.jl schedulers, i.e. :static, :dynamic, :greedy or :serial. Assuming the workload is uniform (as the GPU algorithm prefers), :static is used by default; if you need fine-grained control over your threads, consider using OhMyThreads.jl directly.\n\nUse at most max_tasks threads with at least min_elems elements per task.\n\nGPU settings\n\nThe block_size parameter controls the number of threads per block.\n\nThe temp parameter can be used to pass a pre-allocated temporary array. For reduction to a scalar (dims=nothing), length(temp) >= 2 * (length(src) + 2 * block_size - 1) ÷ (2 * block_size) is required. For reduction along a dimension (dims is an integer), temp is used as the destination array, and thus must have the exact dimensions required - i.e. same dimensionwise sizes as src, except for the reduced dimension which becomes 1; there are some corner cases when one dimension is zero, check against Base.reduce for CPU arrays for exact behavior.\n\nThe switch_below parameter controls the threshold below which the reduction is performed on the CPU and is only used for 1D reductions (i.e. dims=nothing).\n\nExample\n\nComputing a sum of squares, reducing down to a scalar that is copied to host:\n\nimport AcceleratedKernels as AK\nusing CUDA\n\nv = CuArray{Int16}(rand(1:1000, 100_000))\nvsumsq = AK.mapreduce(x -> x * x, (x, y) -> x + y, v; init=zero(eltype(v)))\n\nComputing dimensionwise sums of squares in a 2D matrix:\n\nimport AcceleratedKernels as AK\nusing Metal\n\nf(x) = x * x\nm = MtlArray(rand(Int32(1):Int32(100), 10, 100_000))\nmrowsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=1)\nmcolsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=2)\n\n\n\n\n\n","category":"function"},{"location":"api/using_backends/#Using-Different-Backends","page":"Using Different Backends","title":"Using Different Backends","text":"","category":"section"},{"location":"api/using_backends/","page":"Using Different Backends","title":"Using Different Backends","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.1. Using Different Backends\") # hide","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"(Image: Logo)","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"Parallel algorithm building blocks for the Julia ecosystem, targeting multithreaded CPUs, and GPUs via Intel oneAPI, AMD ROCm, Apple Metal and Nvidia CUDA (and any future backends added to the JuliaGPU organisation).","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"","category":"page"},{"location":"#What's-Different?","page":"Overview","title":"What's Different?","text":"","category":"section"},{"location":"","page":"Overview","title":"Overview","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 1. What's Different?\") # hide","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"","category":"page"},{"location":"#Status","page":"Overview","title":"Status","text":"","category":"section"},{"location":"","page":"Overview","title":"Overview","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 2. Status\") # hide","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"","category":"page"},{"location":"#Acknowledgements","page":"Overview","title":"Acknowledgements","text":"","category":"section"},{"location":"","page":"Overview","title":"Overview","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"## 11. Acknowledgements\") # hide","category":"page"},{"location":"","page":"Overview","title":"Overview","text":"","category":"page"},{"location":"#License","page":"Overview","title":"License","text":"","category":"section"},{"location":"","page":"Overview","title":"Overview","text":"AcceleratedKernels.jl is MIT-licensed. Enjoy.","category":"page"},{"location":"api/reduce/#Reductions","page":"Reduce","title":"Reductions","text":"","category":"section"},{"location":"api/reduce/","page":"Reduce","title":"Reduce","text":"import AcceleratedKernels as AK # hide\nAK.DocHelpers.readme_section(\"### 5.5. `reduce`\") # hide","category":"page"},{"location":"api/reduce/","page":"Reduce","title":"Reduce","text":"","category":"page"},{"location":"api/reduce/","page":"Reduce","title":"Reduce","text":"AcceleratedKernels.reduce","category":"page"},{"location":"api/reduce/#AcceleratedKernels.reduce","page":"Reduce","title":"AcceleratedKernels.reduce","text":"reduce(\n    op, src::AbstractArray;\n    init,\n    dims::Union{Nothing, Int}=nothing,\n\n    # CPU settings\n    scheduler=:static,\n    max_tasks=Threads.nthreads(),\n    min_elems=1,\n\n    # GPU settings\n    block_size::Int=256,\n    temp::Union{Nothing, AbstractGPUArray}=nothing,\n    switch_below::Int=0,\n)\n\nReduce src along dimensions dims using the binary operator op. If dims is nothing, reduce src to a scalar. If dims is an integer, reduce src along that dimension. The init value is used as the initial value for the reduction.\n\nCPU settings\n\nThe scheduler can be one of the OhMyThreads.jl schedulers, i.e. :static, :dynamic, :greedy or :serial. Assuming the workload is uniform (as the GPU algorithm prefers), :static is used by default; if you need fine-grained control over your threads, consider using OhMyThreads.jl directly.\n\nUse at most max_tasks threads with at least min_elems elements per task.\n\nGPU settings\n\nThe block_size parameter controls the number of threads per block.\n\nThe temp parameter can be used to pass a pre-allocated temporary array. For reduction to a scalar (dims=nothing), length(temp) >= 2 * (length(src) + 2 * block_size - 1) ÷ (2 * block_size) is required. For reduction along a dimension (dims is an integer), temp is used as the destination array, and thus must have the exact dimensions required - i.e. same dimensionwise sizes as src, except for the reduced dimension which becomes 1; there are some corner cases when one dimension is zero, check against Base.reduce for CPU arrays for exact behavior.\n\nThe switch_below parameter controls the threshold below which the reduction is performed on the CPU and is only used for 1D reductions (i.e. dims=nothing).\n\nExample\n\nComputing a sum, reducing down to a scalar that is copied to host:\n\nimport AcceleratedKernels as AK\nusing CUDA\n\nv = CuArray{Int16}(rand(1:1000, 100_000))\nvsum = AK.reduce((x, y) -> x + y, v; init=zero(eltype(v)))\n\nComputing dimensionwise sums in a 2D matrix:\n\nimport AcceleratedKernels as AK\nusing Metal\n\nm = MtlArray(rand(Int32(1):Int32(100), 10, 100_000))\nmrowsum = AK.reduce(+, m; init=zero(eltype(m)), dims=1)\nmcolsum = AK.reduce(+, m; init=zero(eltype(m)), dims=2)\n\n\n\n\n\n","category":"function"}]
 }
diff --git a/dev/testing/index.html b/dev/testing/index.html
index 36f7f9e..9eae06a 100644
--- a/dev/testing/index.html
+++ b/dev/testing/index.html
@@ -1,8 +1,8 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Testing · AcceleratedKernels.jl</title><meta name="title" content="Testing · AcceleratedKernels.jl"/><meta property="og:title" content="Testing · AcceleratedKernels.jl"/><meta property="twitter:title" content="Testing · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/testing/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/testing/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/testing/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li></ul></li><li class="is-active"><a class="tocitem" href>Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Testing</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Testing</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/testing.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Testing"><a class="docs-heading-anchor" href="#Testing">Testing</a><a id="Testing-1"></a><a class="docs-heading-anchor-permalink" href="#Testing" title="Permalink"></a></h1><div class="markdown"><p>If it ain&#39;t tested, it&#39;s broken. The <code>test/runtests.jl</code> suite does randomised correctness testing on all algorithms in the library. To test locally, execute:</p>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Testing · AcceleratedKernels.jl</title><meta name="title" content="Testing · AcceleratedKernels.jl"/><meta property="og:title" content="Testing · AcceleratedKernels.jl"/><meta property="twitter:title" content="Testing · AcceleratedKernels.jl"/><meta name="description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:description" content="Documentation for AcceleratedKernels.jl."/><meta property="twitter:description" content="Documentation for AcceleratedKernels.jl."/><meta property="og:url" content="https://juliagpu.github.io/KernelAbstractions.jl/testing/"/><meta property="twitter:url" content="https://juliagpu.github.io/KernelAbstractions.jl/testing/"/><link rel="canonical" href="https://juliagpu.github.io/KernelAbstractions.jl/testing/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="AcceleratedKernels.jl logo"/></a><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Overview</a></li><li><a class="tocitem" href="../benchmarks/">Benchmarks</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><span class="tocitem">Manual</span><ul><li><a class="tocitem" href="../api/using_backends/">Using Different Backends</a></li><li><a class="tocitem" href="../api/foreachindex/">General Loops</a></li><li><a class="tocitem" href="../api/map/">Map</a></li><li><a class="tocitem" href="../api/sort/">Sorting</a></li><li><a class="tocitem" href="../api/reduce/">Reduce</a></li><li><a class="tocitem" href="../api/mapreduce/">MapReduce</a></li><li><a class="tocitem" href="../api/accumulate/">Accumulate</a></li><li><a class="tocitem" href="../api/binarysearch/">Binary Search</a></li><li><a class="tocitem" href="../api/predicates/">Predicates</a></li><li><a class="tocitem" href="../api/custom_structs/">Custom Structs</a></li><li><a class="tocitem" href="../api/task_partition/">Task Partitioning</a></li><li><a class="tocitem" href="../api/utilities/">Utilities</a></li></ul></li><li class="is-active"><a class="tocitem" href>Testing</a></li><li><a class="tocitem" href="../debugging/">Debugging Kernels</a></li><li><a class="tocitem" href="../roadmap/">Roadmap</a></li><li><a class="tocitem" href="../references/">References</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Testing</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Testing</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/JuliaGPU/AcceleratedKernels.jl/blob/main/docs/src/testing.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Testing"><a class="docs-heading-anchor" href="#Testing">Testing</a><a id="Testing-1"></a><a class="docs-heading-anchor-permalink" href="#Testing" title="Permalink"></a></h1><div class="markdown"><p>If it ain&#39;t tested, it&#39;s broken. The <code>test/runtests.jl</code> suite does randomised correctness testing on all algorithms in the library. To test locally, execute:</p>
 <pre><code class="language-bash">&#36;&gt; julia -e &#39;import Pkg; Pkg.develop&#40;path&#61;&quot;path/to/AcceleratedKernels.jl&quot;&#41;; Pkg.add&#40;&quot;oneAPI&quot;&#41;&#39;
 &#36;&gt; julia -e &#39;import Pkg; Pkg.test&#40;&quot;AcceleratedKernels.jl&quot;, test_args&#61;&#91;&quot;--oneAPI&quot;&#93;&#41;&#39;</code></pre>
 <p>Replace the <code>&quot;--oneAPI&quot;</code> with <code>&quot;--CUDA&quot;</code>, <code>&quot;--AMDGPU&quot;</code> or <code>&quot;--Metal&quot;</code> to test different backends, as available on your machine.</p>
 <p>Leave out to test the CPU backend:</p>
 <pre><code class="language-bash">&#36;&gt; julia -e &#39;import Pkg; Pkg.test&#40;&quot;AcceleratedKernels.jl&quot;&#41;</code></pre>
-</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../api/task_partition/">« Task Partitioning</a><a class="docs-footer-nextpage" href="../debugging/">Debugging Kernels »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.7.0 on <span class="colophon-date" title="Tuesday 12 November 2024 18:42">Tuesday 12 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</div></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../api/utilities/">« Utilities</a><a class="docs-footer-nextpage" href="../debugging/">Debugging Kernels »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Wednesday 13 November 2024 17:27">Wednesday 13 November 2024</span>. Using Julia version 1.11.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>