diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml
index cf517ff0..912eb870 100644
--- a/.github/workflows/draft-pdf.yml
+++ b/.github/workflows/draft-pdf.yml
@@ -1,4 +1,5 @@
-on: [push]
+name: Build JOSS paper, only on dispatch
+on: [workflow_dispatch]
 
 jobs:
   paper:
diff --git a/.github/workflows/manual_publish.yml b/.github/workflows/manual_publish.yml
index 39384293..c885d097 100644
--- a/.github/workflows/manual_publish.yml
+++ b/.github/workflows/manual_publish.yml
@@ -8,7 +8,10 @@
 
 # assumming this is being done in a repo with trusted publishing permissions in pypi
 
-name: Manually upload Python Package (if auto upload fails)
+# There is an automated runner which should upload a new PyPi package but it often fails due to API BS.
+# This is here so one doesn't have to republish everything
+
+name: manually publish pypi package (for touble shooting)
 
 on: workflow_dispatch
 
diff --git a/.github/workflows/verification_man_mpi_numba.yml b/.github/workflows/verification_man_mpi_numba.yml
new file mode 100644
index 00000000..9508d38f
--- /dev/null
+++ b/.github/workflows/verification_man_mpi_numba.yml
@@ -0,0 +1,35 @@
+name: Verification Tests (manual execution only)
+
+on: workflow_dispatch
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+        fail-fast: false
+        matrix:
+            os: ["ubuntu-latest"] #, "macos-latest"
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up python 3.11
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.11"
+    - name: debug
+      run: |
+        pwd
+        ls
+    - uses: mpi4py/setup-mpi@v1
+    - name: Install dependencies
+      run: |
+        #sudo apt-get install libopenmpi-dev
+        #pip install numpy numba h5py matplotlib scipy pytest colorama mpi4py ngsolve distinctipy
+        pip list 
+        pip install -e .
+    - name: Patch Numba
+      run : |
+        bash .github/workflows/patch.sh
+    - name: Verification Tests - Numba and MPI
+      run: |
+          cd test/verification/analytic
+          python run.py --mode=numba --mpiexec=2
diff --git a/.gitignore b/.gitignore
index 2da634a5..fc02ca5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,9 @@ __pycache__
 *.nbc
 *.nbi
 
+# GPU cache
+__ptxcache__
+
 # Editor
 .spyproject
 *.swp
@@ -51,3 +54,4 @@ pytestdebug.log
 docs/build
 docs/source/pythonapi/generated/
 
+*.csv
diff --git a/README.md b/README.md
index 4190ada2..75604d21 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
 ![mcdc_logo v1](https://user-images.githubusercontent.com/26186244/173467190-74d9b09a-ef7d-4f0e-8bdf-4a076de7c43c.svg)
 
 [![Build](https://github.com/CEMeNT-PSAAP/MCDC/actions/workflows/mpi_numba_reg.yml/badge.svg)](https://github.com/CEMeNT-PSAAP/MCDC/actions/workflows/mpi_numba_reg.yml)
+[![DOI](https://joss.theoj.org/papers/10.21105/joss.06415/status.svg)](https://doi.org/10.21105/joss.06415)
 [![ReadTheDocs](https://readthedocs.org/projects/mcdc/badge/?version=latest&style=flat)](https://mcdc.readthedocs.org/en/latest/ )
 [![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
 
@@ -108,18 +109,21 @@ You can find specifics on how to run these tests locally [here](https://github.c
 
 To provide proper attribution to MC/DC, please cite
 ```
-@inproceedings{var_mc23_mcdc,
-    Booktitle = {International Conference on Mathematics and Computational Methods Applied to Nuclear Science and Engineering},
-    title = {Development of {MC/DC}: a performant, scalable, and portable Python-based {M}onte {C}arlo neutron transport code},
-    year = {2023},
-    author = {Ilham Variansyah and Joanna Piper Morgan and Kyle E. Niemeyer and Ryan G. McClarren},
-    address = {Niagara Falls, Ontario, Canada},
-    doi={10.48550/arXiv.2305.07636},
-}
+    @article{morgan2024mcdc,
+        title = {Monte {Carlo} / {Dynamic} {Code} ({MC}/{DC}): {An} accelerated {Python} package for fully transient neutron transport and rapid methods development},
+        author = {Morgan, Joanna Piper and Variansyah, Ilham and Pasmann, Samuel L. and Clements, Kayla B. and Cuneo, Braxton and Mote, Alexander and Goodman, Charles and Shaw, Caleb and Northrop, Jordan and Pankaj, Rohan and Lame, Ethan and Whewell, Benjamin and McClarren, Ryan G. and Palmer, Todd S. and Chen, Lizhong and Anistratov, Dmitriy Y. and Kelley, C. T. and Palmer, Camille J. and Niemeyer, Kyle E.},
+        journal = {Journal of Open Source Software},
+        volume = {9},
+        number = {96},
+        year = {2024},
+        pages = {6415},
+        url = {https://joss.theoj.org/papers/10.21105/joss.06415},
+        doi = {10.21105/joss.06415},
+    }
 ```
 which should render something like this
 
-Variansyah, Ilham, J. P. Morgan, K. E. Niemeyer, and R. G. McClarren. 2023. “Development of MC/DC: a performant, scalable, and portable Python-based Monte Carlo neutron transport code.” In *International Conference on Mathematics and Computational Methods Applied to Nuclear Science and Engineering*, Niagara Falls, Ontario, Canada. DOI. [10.48550/arXiv.2305.07636](https://doi.org/10.48550/arXiv.2305.07636)
+Morgan et al. (2024). Monte Carlo / Dynamic Code (MC/DC): An accelerated Python package for fully transient neutron transport and rapid methods development. Journal of Open Source Software, 9(96), 6415. https://doi.org/10.21105/joss.06415.
 
 ## License
 
diff --git a/docs/source/contribution.rst b/docs/source/contribution.rst
index 716f324f..8513776c 100644
--- a/docs/source/contribution.rst
+++ b/docs/source/contribution.rst
@@ -10,6 +10,15 @@ Whether you are here to make a single PR and never return, or want to become a m
 We have regular developers meetings for any and all who are interested to discuss contributions to this code base.
 
 This describes the processes of contributing to MC/DC for both internal (CEMeNT) and external developers.
+We make contributions to the ``dev`` branch of MC/DC.
+To get started making alterations in a cloned repo
+
+#. fork ``CEMeNT-PSAAP/MCDC`` to your github account
+#. ``git clone git@github.com:<YOUR_GITHUB>/MCDC.git``
+#. ``git switch dev``
+#. run install script which will install MC/DC as an editable package from this directory
+
+Push some particles around!!!!
 
 Please note our `code of conduct <https://github.com/CEMeNT-PSAAP/MCDC/blob/main/CODE_OF_CONDUCT.md>`_ which we take seriously
 
@@ -88,6 +97,17 @@ Alteratively a developer could delete the ``__pycache__`` directory or other cac
 At some point MC/DC will enable `Numba's Ahead of Time compilation abilities <https://numba.readthedocs.io/en/stable/user/pycc.html>`_. But the core development team is holding off until scheduled `upgrades to AOT functionality in Numba are implemented <https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-numba-pycc>`_.
 However if absolutely required by users numba does allow for some `cache sharing <https://numba.readthedocs.io/en/stable/developer/caching.html>`_.
 
+------------------
+Adding a New Input
+------------------
+
+To add a new keyword argument such that a user can interface with it in an input deck 
+there are a few different places a dev will need to make alterations
+
+#. ``card.py`` (where the input cards are actually defined)
+#. ``type.py`` (where the type information of the inputs are strictly added)
+#. ``input_.py`` (where user inputs are merged with the specifications in ``card.py`` and ``type.py``)
+
 -------
 Testing
 -------
@@ -103,7 +123,7 @@ Our github based CI runs for,
 * linux-64 (x86)
 * osx-64 (x86, intel based macs)
 
-while we do not have continuos integration we have validated MC/DC on other systems.
+while we do not have continuous integration we have validated MC/DC on other systems.
 
 To run the regression tests locally, navigate to ``\MCDC\tests\regression`` and run,
 
@@ -141,7 +161,7 @@ Adding Documentation
 
 
 It's not everything it needs to be but we are trying!
-If your contribution changes the behavior of the input deck, instillation process, or testing infrastructure your contribution must include alteration to this documentaiton.
+If your contribution changes the behavior of the input deck, instillation process, or testing infrastructure your contribution must include alteration to this documentation.
 That can be done by editing the RST files in ``/MCDC/docs/source/<FILENAME>.rst``.
 
 To add a new page to the documentation,
@@ -164,6 +184,8 @@ Pull Requests
 
 MC/DC works off of a fork workflow in which contributors fork our repo, make alterations, and submit a pull requests.
 You should only submit a pull request once your code passes all tests, is properly linted, you have edited documentation (if necessary), and added any new tests (if needed).
+Open a PR to the ``dev`` branch in Github.
+MC/DC's main branch is only updated for version releases at which time a PR from dev to main is opened, tagged, archived, and published automatically.
 
 Within your pull request documentation please list:
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 4a423949..bc0be10b 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -83,7 +83,9 @@ A full exhaustive list of publications can be found on the `CEMeNT site <https:/
     install
     user
     contribution
+    theory/index
     pythonapi/index
+    pubs
 
 .. sidebar-links::
     :caption: Links
@@ -112,15 +114,32 @@ To build the docs
 To Cite MC/DC
 =============
 
-If you use or devlope in MC/DC and would like to publish your results
+If you use MC/DC and would like to provide proper attribution
 please cite our article in the Journal of Open Source software
 
-.. code-block:: 
-
+.. code-block:: bibtex
+        
     @article{morgan2024mcdc,
-        booktitle = {}
-        title = {},
-        author = {},
-        date = {},
-        doi = {},
+        title = {Monte {Carlo} / {Dynamic} {Code} ({MC}/{DC}): {An} accelerated
+                 {Python} package for fully transient neutron transport and
+                 rapid methods development},
+        author = {Morgan, Joanna Piper and Variansyah, Ilham and Pasmann, Samuel L. and 
+                  Clements, Kayla B. and Cuneo, Braxton and Mote, Alexander and
+                  Goodman, Charles and Shaw, Caleb and Northrop, Jordan and Pankaj, Rohan and
+                  Lame, Ethan and Whewell, Benjamin and McClarren, Ryan G. and Palmer, Todd S.
+                  and Chen, Lizhong and Anistratov, Dmitriy Y. and Kelley, C. T. and
+                  Palmer, Camille J. and Niemeyer, Kyle E.},
+        journal = {Journal of Open Source Software},
+        volume = {9},
+        number = {96},
+        year = {2024},
+        pages = {6415},
+        url = {https://joss.theoj.org/papers/10.21105/joss.06415},
+        doi = {10.21105/joss.06415},
     }
+
+If you are developing or working with specific numerical methods please take greater care
+to cite the specific publications where that work is presented.
+An exhaustive list can be found on our :ref:`pubs` page.
+Also check out and even longer list of associated publications on our
+`center's publications page <https://cement-psaap.github.io/publications/>`_
diff --git a/docs/source/pubs.rst b/docs/source/pubs.rst
new file mode 100644
index 00000000..fdf141e8
--- /dev/null
+++ b/docs/source/pubs.rst
@@ -0,0 +1,149 @@
+.. _pubs:
+
+=============
+Publications
+=============
+
+This page contains various formal publications describing the features and algorithms of MC/DC.
+If you are writing a research paper please provide proper attribution to those whose work you are using in MC/DC.
+Generally, the citation on the home page is a good place to start.
+
+General MC/DC Papers
+---------------------
+
+I. Variansyah, J. P. Morgan, J. Northrop K. E. Niemeyer, and 
+R. G. McClarren. “Development of MC/DC: a performant, scalable, 
+and portable Python-based Monte Carlo neutron transport code.” 
+In International Conference on Mathematics and Computational
+Methods Applied to Nuclear Science and Engineering. Niagara Falls, 
+Ontario, Canada (2023). Preprint DOI 10.48550/arXiv.2305.13555.
+
+I. Variansyah and R. G. McClarren. “High-fidelity treatment for object
+movement in time-dependent Monte Carlo transport simulations.”
+In International Conference on Mathematics and Computational
+Methods Applied to Nuclear Science and Engineering. Niagara Falls,
+Ontario, Canada (2023). Preprint DOI 10.48550/arXiv.2305.07641.
+
+
+Population Control Publications
+--------------------------------
+
+I. Variansyah and R. G. McClarren. “An effective initial particle 
+sampling technique for Monte Carlo reactor transient simulations.” 
+In International Conference on Mathematics and Computational Methods 
+Applied to Nuclear Science and Engineering. Niagara Falls, Ontario, 
+Canada (2023). Preprint DOI 10.48550/arXiv.2305.07646.
+
+I. Variansyah and R. G. McClarren. “An effective initial particle sampling
+technique for Monte Carlo reactor transient simulations.” In International
+Conference on Physics of Reactors. Pittsburgh, Pennsylvania, USA (2022).
+
+I. Variansyah and R. G. McClarren. “Performance of Population Control
+Techniques in Monte Carlo Reactor Criticality Simulation.” In International
+Conference on Physics of Reactors. Pittsburgh, Pennsylvania, USA (2022).
+
+Software Engineering in MC/DC Publications
+-------------------------------------------
+
+J. P. Morgan, I. Variansyah, S. Pasmann, K. B. Clements, B. Cuneo, A. Mote, 
+C. Goodman, C. Shaw, J. Northrop, R. Pankaj, E. Lame, B. Whewell, 
+R. McClarren, T. S. Palmer, L. Chen, D. Anistratov, C. T. Kelley, 
+C. Palmer, and K. E. Niemeyer. Monte Carlo / Dynamic Code (MC/DC): 
+An accelerated Python package for fully transient neutron transport 
+and rapid methods development. Accepted Journal of Open Source Software. 
+9(96), 6415. DOI 10.21105/joss.06415.
+
+B. Cuneo and Mike Bailey. 2023. Divergence Reduction in Monte Carlo 
+Neutron Transport with On-GPU Asynchronous Scheduling. ACM Trans. 
+Model. Comput. Simul. (October 2023). DOI 10.1145/3626957.
+
+J. P. Morgan, T. S. Palmer, and K. E. Niemeyer. “Automatic Hardware Code Generation
+for Neutron Transport Applications.” In Transactions of the American Nuclear Society,
+volume 126, p. 318–320. American Nuclear Society, Anaheim, CA (2022)
+Preprint DOI: 10.5281/zenodo.6646813 DOI: 10.13182/T126-38137.
+
+UQ Publications
+---------------
+
+K. B. Clements, G. Geraci, A. J. Olson, and T. S. Palmer. 
+A variance deconvolution estimator for efficient uncertainty 
+quantification in Monte Carlo radiation transport applications. 
+Accepted Journal of Quantitative Spectroscopy and Radiative Transfer. 
+(2024). DOI 10.1016/j.jqsrt.2024.108958.
+
+K. B. Clements, G. Geraci, A. J. Olson, and T. S. Palmer. 
+“Global Sensitivity Analysis in Monte Carlo Radiation Transport.” 
+In International Conference on Mathematics and Computational Methods
+Applied to Nuclear Science and Engineering. Niagara Falls, Ontario, Canada (2023)
+
+K. B. Clements, G. Geraci, and A. J. Olson, A Variance Deconvolution Approach
+to Sampling Uncertainty Quantification for Monte Carlo Radiation Transport
+Solvers, in Computer Science Research Institute Summer Proceedings 2021,
+J.D. Smith and E. Galvan, eds., Technical Report SAND2022-0653R,
+Sandia National Laboratories, 2021, pp. 293–307. DOI: 10.2172/1855061.
+
+Hybrid Monte Carlo Publications
+-------------------------------
+
+B. Whewell, R. G. McClarren, C. D. Hauck & M. Shin “Multigroup Neutron Transport 
+Using a Collision-Based Hybrid Method”, Nuclear Science and Engineering, 
+197:7, 1386-1405, (2023) DOI: 10.1080/00295639.2022.2154119.
+
+E. Smith, I. Variansyah, and R. G. McClarren. 
+“Variable Dynamic Mode Decomposition for Estimating Time Eigenvalues
+in Nuclear Systems.” Nuclear Science and Engineering (2022). 
+DOI 10.1080/00295639.2022.2142025, Preprint.
+
+V. Novellino and D. Anistratov, Analysis of Hybrid MC/Deterministic Methods
+for Transport Problems Based on Low-Order Equations Discretized by
+Finite Volume Scheme. Transaction of American Nuclear Society, 
+v. 130, 2024 Preprint DOI: 10.48550/arXiv:2403.05673
+
+E. Smith, I. Variansyah, and R. G. McClarren. “Compressed Dynamic Mode Decomposition
+for Time-Eigenvalue Calculations.” In International Conference on Mathematics
+and Computational Methods Applied to Nuclear Science and Engineering.
+Niagara Falls, Ontario, Canada (2023). Preprint DOI 10.48550/arXiv.2208.10942.
+
+iQMC Publications
+-----------------
+
+S. Pasmann, I. Variansyah, C. T. Kelley, and R. G. McClarren. (2024). 
+Mitigating Spatial Error in the iterative-Quasi-Monte Carlo (iQMC) Method 
+for Neutron Transport Simulations with Linear Discontinuous Source Tilting 
+and Effective Scattering and Fission Rate Tallies. Accepted Nuclear Science 
+and Engineering. Preprint DOI 10.48550/arXiv.2401.04029
+
+S. Pasmann, I. Variansyah, C. T. Kelley, and R. G. McClarren. 
+“A Quasi-Monte Carlo Method with Krylov Linear Solvers for Multigroup 
+Neutron Transport Simulations.” Nuclear Science and Engineering (
+Jan 2023). DOI 10.1080/00295639.2022.2143704.
+
+S. Pasmann, I. Variansyah, C. T. Kelley, and R. G. McClarren.
+“iQMC: Iterative Quasi-Monte Carlo with Krylov Linear Solvers
+for k-Eigenvalue Neutron Transport Simulations.” In International
+Conference on Mathematics and Computational Methods Applied to
+Nuclear Science and Engineering. Niagara Falls, Ontario, Canada
+(2023). Preprint DOI: 0.48550/arXiv.2306.11600.
+
+S. Pasmann, I. Variansyah, and R. G. McClarren.
+“Convergent Transport Source Iteration Calculations
+with Quasi-Monte Carlo.” In Transactions of the American Nuclear Society,
+volume 124, pp. 192–195. American Nuclear Society (2021).
+
+Validation and Verification Publications
+----------------------------------------
+
+C. J. Palmer, J. Northrop, T. S. Palmer, A. J. Reynolds.
+Validation of Time-dependent Shift using the Pulsed Sphere
+Benchmarks. Frontiers in Nuclear Engineering,
+Sec. Fission and Reactor Design. Vol 2. (2023). DOI: 10.3389/fnuen.2023.1294583.
+
+J. Northrop, C. Palmer, and A. J. Reynolds. “Inter-code Comparison of Time Independent
+Pulsed Sphere Benchmark Results.” In Transactions of the American Nuclear Society,
+volume 126. p. 334-337 American Nuclear Society, Anaheim, CA (2022).
+Preprint DOI: 10.5281/zenodo.7250603 DOI 10.13182/T126-38312.
+
+A. J. Reynolds, & T. S. Palmer. Verification and Scaling of Time-Dependent
+Shift Using the AZURV1 Benchmark. In Transactions of the
+American Nuclear Society, volume 126. p. 310-313. Anaheim,
+California, United States (2020) Preprint DOI 10.5281/zenodo.7222601 DOI 10.13182/T126-38060.
diff --git a/docs/source/pythonapi/index.rst b/docs/source/pythonapi/index.rst
index 31a5e32f..3e532e0c 100644
--- a/docs/source/pythonapi/index.rst
+++ b/docs/source/pythonapi/index.rst
@@ -27,12 +27,10 @@ Defining geometry
    :nosignatures:
    :template: omcfunction.rst
 
-   mcdc.surface
    mcdc.cell
-   mcdc.universe
    mcdc.lattice
-   
-
+   mcdc.surface
+   mcdc.universe
 
 Defining simulation
 -------------------
@@ -42,12 +40,10 @@ Defining simulation
    :nosignatures:
    :template: omcfunction.rst
 
-   mcdc.source
-   mcdc.tally
    mcdc.eigenmode
    mcdc.setting
-
-
+   mcdc.source
+   mcdc.tally
 
 Defining techniques
 -------------------
@@ -57,16 +53,20 @@ Defining techniques
    :nosignatures:
    :template: omcfunction.rst
 
+   mcdc.branchless_collision
+   mcdc.domain_decomposition
+   mcdc.dsm
+   mcdc.IC_generator
+   mcdc.iQMC
    mcdc.implicit_capture
-   mcdc.weighted_emission
    mcdc.population_control
-   mcdc.branchless_collision
    mcdc.time_census
-   mcdc.weight_window
-   mcdc.iQMC
+   mcdc.weighted_emission
    mcdc.weight_roulette
-   mcdc.IC_generator
-   mcdc.dsm
+   mcdc.weight_window
+
+
+
 
 
 
diff --git a/docs/source/theory/ana.rst b/docs/source/theory/ana.rst
new file mode 100644
index 00000000..b159509a
--- /dev/null
+++ b/docs/source/theory/ana.rst
@@ -0,0 +1,7 @@
+.. _ana:
+
+============================
+Acceleration and Abstraction
+============================
+
+blah 
diff --git a/docs/source/theory/compressed_sensing.rst b/docs/source/theory/compressed_sensing.rst
new file mode 100644
index 00000000..e9420010
--- /dev/null
+++ b/docs/source/theory/compressed_sensing.rst
@@ -0,0 +1,7 @@
+.. _compressed_sensing:
+
+==================
+Compressed Sensing
+==================
+
+blah
\ No newline at end of file
diff --git a/docs/source/theory/cont_energy.rst b/docs/source/theory/cont_energy.rst
new file mode 100644
index 00000000..126aa4ed
--- /dev/null
+++ b/docs/source/theory/cont_energy.rst
@@ -0,0 +1,7 @@
+.. _cont_energy:
+
+=================
+Continuous Energy
+=================
+
+blah
diff --git a/docs/source/theory/cont_movement.rst b/docs/source/theory/cont_movement.rst
new file mode 100644
index 00000000..3ba36c11
--- /dev/null
+++ b/docs/source/theory/cont_movement.rst
@@ -0,0 +1,7 @@
+.. _cont_movement:
+
+===================
+Continuous Movement
+===================
+
+blah
\ No newline at end of file
diff --git a/docs/source/theory/domain_decomp.rst b/docs/source/theory/domain_decomp.rst
new file mode 100644
index 00000000..47fc3702
--- /dev/null
+++ b/docs/source/theory/domain_decomp.rst
@@ -0,0 +1,15 @@
+.. _dd:
+
+====================
+Domain Decomposition
+====================
+
+blah 
+
+This is an equation as a separate call out:
+
+.. math::
+
+   \frac{ \sum_{t=0}^{N}f(t,k) }{N}
+
+and this is an equation inlined: :math:`\frac{ \sum_{t=0}^{N}f(t,k) }{N}`. Be careful about the number of returns with callouts!
\ No newline at end of file
diff --git a/docs/source/theory/gpu.rst b/docs/source/theory/gpu.rst
new file mode 100644
index 00000000..0b73a747
--- /dev/null
+++ b/docs/source/theory/gpu.rst
@@ -0,0 +1,7 @@
+.. _gpu:
+
+=================
+GPU Functionality
+=================
+
+blah
diff --git a/docs/source/theory/index.rst b/docs/source/theory/index.rst
new file mode 100644
index 00000000..f4273fcb
--- /dev/null
+++ b/docs/source/theory/index.rst
@@ -0,0 +1,29 @@
+.. _theory:
+
+============
+Theory Guide
+============
+
+We provided a brief theory guide into some of the novel functions and features in MC/DC.
+Both for algorithms and simulation/compilation schemes.
+
+For a more basic primer of Monte Carlo neutron transport and methods we suggest exploring the `OpenMC theory guide <https://docs.openmc.org/en/latest/methods/index.html>`_ .
+
+.. only:: html
+
+   --------
+   Contents
+   --------
+
+.. toctree::
+    :maxdepth: 1
+
+    gpu
+    ana
+    iqmc
+    ww
+    uq
+    compressed_sensing
+    cont_energy
+    domain_decomp
+    cont_movement
diff --git a/docs/source/theory/iqmc.rst b/docs/source/theory/iqmc.rst
new file mode 100644
index 00000000..7f3b018f
--- /dev/null
+++ b/docs/source/theory/iqmc.rst
@@ -0,0 +1,7 @@
+.. _iqmc:
+
+======
+iQMC
+======
+
+blah
\ No newline at end of file
diff --git a/docs/source/theory/uq.rst b/docs/source/theory/uq.rst
new file mode 100644
index 00000000..0a3e7771
--- /dev/null
+++ b/docs/source/theory/uq.rst
@@ -0,0 +1,7 @@
+.. _uq:
+
+==========================
+Uncertainty Quantification
+==========================
+
+blah
diff --git a/docs/source/theory/ww.rst b/docs/source/theory/ww.rst
new file mode 100644
index 00000000..9ec659f0
--- /dev/null
+++ b/docs/source/theory/ww.rst
@@ -0,0 +1,7 @@
+.. _ww:
+
+===============
+Weight Windows
+===============
+
+blah
\ No newline at end of file
diff --git a/examples/fixed_source/slab_reed_dd/input.py b/examples/fixed_source/slab_reed_dd/input.py
new file mode 100644
index 00000000..167e102b
--- /dev/null
+++ b/examples/fixed_source/slab_reed_dd/input.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+import mcdc
+
+# =============================================================================
+# Set model
+# =============================================================================
+# Three slab layers with different materials
+# Based on William H. Reed, NSE (1971), 46:2, 309-314, DOI: 10.13182/NSE46-309
+
+# Set materials
+m1 = mcdc.material(capture=np.array([50.0]))
+m2 = mcdc.material(capture=np.array([5.0]))
+m3 = mcdc.material(capture=np.array([0.0]))  # Vacuum
+m4 = mcdc.material(capture=np.array([0.1]), scatter=np.array([[0.9]]))
+
+# Set surfaces
+s1 = mcdc.surface("plane-z", z=0.0, bc="reflective")
+s2 = mcdc.surface("plane-z", z=2.0)
+s3 = mcdc.surface("plane-z", z=3.0)
+s4 = mcdc.surface("plane-z", z=5.0)
+s5 = mcdc.surface("plane-z", z=8.0, bc="vacuum")
+
+# Set cells
+mcdc.cell([+s1, -s2], m1)
+mcdc.cell([+s2, -s3], m2)
+mcdc.cell([+s3, -s4], m3)
+mcdc.cell([+s4, -s5], m4)
+
+# =============================================================================
+# Set source
+# =============================================================================
+
+# Isotropic source in the absorbing medium
+mcdc.source(z=[0.0, 2.0], isotropic=True, prob=50.0)
+
+# Isotropic source in the first half of the outermost medium,
+# with 1/100 strength
+mcdc.source(z=[5.0, 6.0], isotropic=True, prob=0.5)
+
+# =============================================================================
+# Set tally, setting, and run mcdc
+# =============================================================================
+
+mcdc.tally(scores=["flux"], z=np.linspace(0.0, 8.0, 81))
+
+# Setting
+mcdc.setting(N_particle=5000)
+mcdc.domain_decomposition(z=np.linspace(0.0, 8.0, 5))
+# Run
+mcdc.run()
diff --git a/examples/fixed_source/slab_reed_dd/process.py b/examples/fixed_source/slab_reed_dd/process.py
new file mode 100644
index 00000000..09799cc3
--- /dev/null
+++ b/examples/fixed_source/slab_reed_dd/process.py
@@ -0,0 +1,123 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import h5py
+from scipy.integrate import quad
+
+# =============================================================================
+# Reference solution (not accurate enough for N_hist > 1E7)
+# =============================================================================
+
+
+def phi1(x):
+    return (
+        1.0
+        - 5.96168047527760 * 10 ** (-47) * np.cosh(52.06761235859028 * x)
+        - 6.78355315350872 * 10 ** (-56) * np.cosh(62.76152118553390 * x)
+        - 7.20274049646598 * 10 ** (-84) * np.cosh(95.14161078659372 * x)
+        - 6.34541150517664 * 10 ** (-238) * np.cosh(272.5766481169758 * x)
+    )
+
+
+def phi2(x):
+    return (
+        1.685808767651539 * 10**3 * np.exp(-5.206761235859028 * x)
+        + 3.143867366942945 * 10**4 * np.exp(-6.276152118553390 * x)
+        + 2.879977113018352 * 10**7 * np.exp(-9.514161078659372 * x)
+        + 8.594190506002560 * 10**22 * np.exp(-27.25766481169758 * x)
+        + 1.298426035202193 * 10 ** (-36) * np.exp(27.25766481169758 * x)
+        + 1.432344656303454 * 10 ** (-13) * np.exp(9.514161078659372 * x)
+        + 1.514562265056083 * 10 ** (-9) * np.exp(6.276152118553390 * x)
+        + 1.594431209450755 * 10 ** (-8) * np.exp(5.206761235859028 * x)
+    )
+
+
+def phi3(x):
+    return 1.105109108062394
+
+
+def phi4(x):
+    return (
+        10.0
+        - 0.1983746883968300 * np.exp(0.5254295183311557 * x)
+        - 7.824765332896027 * 10 ** (-5) * np.exp(1.108937229227813 * x)
+        - 9.746660212187006 * 10 ** (-6) * np.exp(1.615640334315550 * x)
+        - 2.895098351422132 * 10 ** (-13) * np.exp(4.554850586269065 * x)
+        - 75.34793864805979 * np.exp(-0.5254295183311557 * x)
+        - 20.42874998426011 * np.exp(-1.108937229227813 * x)
+        - 7.129175418204712 * 10 ** (2) * np.exp(-1.615640334315550 * x)
+        - 2.716409367577795 * 10 ** (9) * np.exp(-4.554850586269065 * x)
+    )
+
+
+def phi5(x):
+    return (
+        31.53212162577067 * np.exp(-0.5254295183311557 * x)
+        + 26.25911060454856 * np.exp(-1.108937229227813 * x)
+        + 1.841223066417334 * 10 ** (3) * np.exp(-1.615640334315550 * x)
+        + 1.555593549394869 * 10 ** (11) * np.exp(-4.554850586269065 * x)
+        - 3.119310353653182 * 10 ** (-3) * np.exp(0.5254295183311557 * x)
+        - 6.336401143340483 * 10 ** (-7) * np.exp(1.108937229227813 * x)
+        - 3.528757679361232 * 10 ** (-8) * np.exp(1.615640334315550 * x)
+        - 4.405514335746888 * 10 ** (-18) * np.exp(4.554850586269065 * x)
+    )
+
+
+def f_phi(x1, x2):
+    dx = x2 - x1
+    if x2 <= 2.0:
+        return quad(phi1, x1, x2)[0] / dx
+    if x2 <= 3.0:
+        return quad(phi2, x1, x2)[0] / dx
+    if x2 <= 5.0:
+        return quad(phi3, x1, x2)[0] / dx
+    if x2 <= 6.0:
+        return quad(phi4, x1, x2)[0] / dx
+    return quad(phi5, x1, x2)[0] / dx
+
+
+def f_phi_x(x):
+    if x <= 2.0:
+        return phi1(x)
+    if x <= 3.0:
+        return phi2(x)
+    if x <= 5.0:
+        return phi3(x)
+    if x <= 6.0:
+        return phi4(x)
+    return phi5(x)
+
+
+with h5py.File("output.h5", "r") as f:
+    x = f["tally/grid/z"][:]
+    dx = x[1] - x[0]
+    x_mid = 0.5 * (x[:-1] + x[1:])
+
+phi_ref = np.zeros_like(x_mid)
+phi_x_ref = np.zeros_like(x)
+
+for i in range(len(x)):
+    phi_x_ref[i] = f_phi_x(x[i])
+
+for i in range(len(x_mid)):
+    phi_ref[i] = f_phi(x[i], x[i + 1])
+
+# =============================================================================
+# Plot
+# =============================================================================
+
+# Load output
+with h5py.File("output.h5", "r") as f:
+    # Note the spatial (dx) and source strength (100+1) normalization
+    phi = f["tally/flux/mean"][:] / dx * 101.0
+    phi_sd = f["tally/flux/sdev"][:] / dx * 101.0
+
+# Flux - spatial average
+plt.plot(x_mid, phi, "-b", label="MC")
+plt.fill_between(x_mid, phi - phi_sd, phi + phi_sd, alpha=0.2, color="b")
+plt.plot(x_mid, phi_ref, "--r", label="ref.")
+plt.xlabel(r"$x$, cm")
+plt.ylabel("Flux")
+plt.grid()
+plt.legend()
+plt.title(r"$\bar{\phi}_i$")
+plt.show()
diff --git a/mcdc/__init__.py b/mcdc/__init__.py
index 09595dfb..632d644a 100644
--- a/mcdc/__init__.py
+++ b/mcdc/__init__.py
@@ -23,6 +23,7 @@
     uq,
     print_card,
     reset_cards,
+    domain_decomposition,
 )
 from mcdc.main import run, prepare
 from mcdc.visualizer import visualize
diff --git a/mcdc/adapt.py b/mcdc/adapt.py
new file mode 100644
index 00000000..4cb2ac92
--- /dev/null
+++ b/mcdc/adapt.py
@@ -0,0 +1,541 @@
+import numpy as np
+from numba import njit, jit, objmode, literal_unroll, cuda, types
+from numba.extending import intrinsic
+import numba
+import mcdc.type_ as type_
+import mcdc.kernel as kernel
+import mcdc.loop as loop
+
+try:
+    import harmonize as harm
+
+    HAS_HARMONIZE = True
+except:
+    HAS_HARMONIZE = False
+
+import math
+import inspect
+
+from mcdc.print_ import print_error
+
+import mcdc.adapt as adapt
+
+
+# =============================================================================
+# Error Messangers
+# =============================================================================
+
+
+def unknown_target(target):
+    print_error(f"ERROR: Unrecognized target '{target}'")
+
+
+# =============================================================================
+# uintp/voidptr casters
+# =============================================================================
+
+
+@intrinsic
+def cast_uintp_to_voidptr(typingctx, src):
+    # check for accepted types
+    if isinstance(src, types.Integer):
+        # create the expected type signature
+        result_type = types.voidptr
+        sig = result_type(types.uintp)
+
+        # defines the custom code generation
+        def codegen(context, builder, signature, args):
+            # llvm IRBuilder code here
+            [src] = args
+            rtype = signature.return_type
+            llrtype = context.get_value_type(rtype)
+            return builder.inttoptr(src, llrtype)
+
+        return sig, codegen
+
+
+@intrinsic
+def cast_voidptr_to_uintp(typingctx, src):
+    # check for accepted types
+    if isinstance(src, types.RawPointer):
+        # create the expected type signature
+        result_type = types.uintp
+        sig = result_type(types.voidptr)
+
+        # defines the custom code generation
+        def codegen(context, builder, signature, args):
+            # llvm IRBuilder code here
+            [src] = args
+            rtype = signature.return_type
+            llrtype = context.get_value_type(rtype)
+            return builder.ptrtoint(src, llrtype)
+
+        return sig, codegen
+
+
+# =============================================================================
+# Decorators
+# =============================================================================
+
+toggle_rosters = {}
+
+target_rosters = {}
+
+late_jit_roster = set()
+
+do_nothing_id = 0
+
+
+def generate_do_nothing(arg_count, crash_on_call=None):
+    global do_nothing_id
+    name = f"do_nothing_{do_nothing_id}"
+    args = ", ".join([f"arg_{i}" for i in range(arg_count)])
+    source = f"def {name}({args}):\n"
+    if crash_on_call != None:
+        source += f"    assert False, '{crash_on_call}'\n"
+    else:
+        source += "    pass\n"
+    exec(source)
+    result = eval(name)
+    do_nothing_id += 1
+    return result
+
+
+def overwrite_func(func, revised_func):
+    mod_name = func.__module__
+    fn_name = func.__name__
+    new_fn_name = revised_func.__name__
+    # print(f"Overwriting function {fn_name} in module {mod_name} with {new_fn_name}")
+    module = __import__(mod_name, fromlist=[fn_name])
+    setattr(module, fn_name, revised_func)
+
+
+def toggle(flag):
+    def toggle_inner(func):
+        global toggle_rosters
+        if flag not in toggle_rosters:
+            toggle_rosters[flag] = [False, []]
+        toggle_rosters[flag][1].append(func)
+        return func
+
+    return toggle_inner
+
+
+def set_toggle(flag, val):
+    toggle_rosters[flag][0] = val
+
+
+def eval_toggle():
+    global toggle_rosters
+    for _, pair in toggle_rosters.items():
+        val = pair[0]
+        roster = pair[1]
+        for func in roster:
+            if val:
+                overwrite_func(func, numba.njit(func))
+            else:
+                global do_nothing_id
+                name = func.__name__
+                # print(f"do_nothing_{do_nothing_id} for {name}")
+                arg_count = len(inspect.signature(func).parameters)
+                overwrite_func(func, numba.njit(generate_do_nothing(arg_count)))
+
+
+blankout_roster = {}
+
+
+def blankout_fn(func):
+    global blankout_roster
+
+    mod_name = func.__module__
+    fn_name = func.__name__
+    id = (mod_name, fn_name)
+
+    if id not in blankout_roster:
+        global do_nothing_id
+        name = func.__name__
+        # print(f"do_nothing_{do_nothing_id} for {name}")
+        arg_count = len(inspect.signature(func).parameters)
+        blankout_roster[id] = generate_do_nothing(
+            arg_count, crash_on_call=f"blankout fn for {name} should never be called"
+        )
+
+    blank = blankout_roster[id]
+
+    return blank
+
+
+def for_(target, on_target=[]):
+    # print(f"{target}")
+    def for_inner(func):
+        global target_rosters
+        mod_name = func.__module__
+        fn_name = func.__name__
+        # print(f"{target} {mod_name} {fn_name}")
+        params = inspect.signature(func).parameters
+        if target not in target_rosters:
+            target_rosters[target] = {}
+        target_rosters[target][(mod_name, fn_name)] = func
+        # blank = blankout_fn(func)
+        param_str = ", ".join(p for p in params)
+        jit_str = f"def jit_func({param_str}):\n    global target_rosters\n    return target_rosters['{target}'][('{mod_name}','{fn_name}')]"
+        # print(jit_str)
+        exec(jit_str, globals(), locals())
+        result = eval("jit_func")
+        blank = blankout_fn(func)
+        numba.core.extending.overload(blank, target=target)(result)
+        return blank
+
+    return for_inner
+
+
+def for_cpu(on_target=[]):
+    return for_("cpu", on_target=on_target)
+
+
+def for_gpu(on_target=[]):
+    return for_("gpu", on_target=on_target)
+
+
+def target_for(target):
+    pass
+
+
+#
+#    for func in late_jit_roster:
+#        if target == 'cpu':
+#            overwrite_func(func,numba.njit)
+#        elif target == 'gpu':
+#            overwrite_func(func,numba.cuda.jit)
+#        else:
+#            unknown_target(target)
+#
+#    for func, on_target in target_rosters[target].items():
+#        transformed = func
+#        for transformer in on_target:
+#            transformed = transformer(transformed)
+#        name = func.__name__
+#        print(f"Overwriting func {name} for target {target}")
+#        overwrite_func(func, transformed)
+
+
+def jit_on_target():
+    def jit_on_target_inner(func):
+        late_jit_roster.add(func)
+        return func
+
+    return jit_on_target_inner
+
+
+def nopython_mode(is_on):
+    if is_on:
+        return
+    if not isinstance(target_rosters["cpu"], dict):
+        return
+
+    for impl in target_rosters["cpu"].values():
+        overwrite_func(impl, impl)
+
+
+# Function adapted from Phillip Eller's `myjit` solution to the GPU/CPU array
+# problem brought up in https://github.com/numba/numba/issues/2571
+def universal_arrays(target):
+    def universal_arrays_inner(func):
+        if target == "gpu":
+            source = inspect.getsource(func).splitlines()
+            for idx, line in enumerate(source):
+                if "@universal_arrays" in line:
+                    source = "\n".join(source[idx + 1 :]) + "\n"
+                    break
+            source = source.replace("np.empty", "cuda.local.array")
+            # print(source)
+            exec(source)
+            revised_func = eval(func.__name__)
+            overwrite_func(func, revised_func)
+            # module = __import__(func.__module__,fromlist=[func.__name__])
+            # print(inspect.getsource(getattr(module,func.__name__)))
+        return revised_func
+
+    return universal_arrays_inner
+
+
+# =============================================================================
+# GPU Type / Extern Functions Forward Declarations
+# =============================================================================
+
+
+SIMPLE_ASYNC = True
+
+none_type = None
+mcdc_type = None
+state_spec = None
+device_gpu = None
+group_gpu = None
+thread_gpu = None
+particle_gpu = None
+prep_gpu = None
+step_async = None
+find_cell_async = None
+
+
+def gpu_forward_declare():
+
+    global none_type, mcdc_type, state_spec
+    global device_gpu, group_gpu, thread_gpu
+    global particle_gpu, particle_record_gpu
+    global step_async, find_cell_async
+
+    none_type = numba.from_dtype(np.dtype([]))
+    mcdc_type = numba.from_dtype(type_.global_)
+    state_spec = (mcdc_type, none_type, none_type)
+    device_gpu, group_gpu, thread_gpu = harm.RuntimeSpec.access_fns(state_spec)
+    particle_gpu = numba.from_dtype(type_.particle)
+    particle_record_gpu = numba.from_dtype(type_.particle_record)
+
+    def step(prog: numba.uintp, P: particle_gpu):
+        pass
+
+    def find_cell(prog: numba.uintp, P: particle_gpu):
+        pass
+
+    step_async, find_cell_async = adapt.harm.RuntimeSpec.async_dispatch(step, find_cell)
+
+
+# =============================================================================
+# Seperate GPU/CPU Functions to Target Different Platforms
+# =============================================================================
+
+
+@for_cpu()
+def device(prog):
+    return prog
+
+
+@for_gpu()
+def device(prog):
+    return device_gpu(prog)
+
+
+@for_cpu()
+def group(prog):
+    return prog
+
+
+@for_gpu()
+def group(prog):
+    return group_gpu(prog)
+
+
+@for_cpu()
+def thread(prog):
+    return prog
+
+
+@for_gpu()
+def thread(prog):
+    return thread_gpu(prog)
+
+
+@for_cpu()
+def add_active(particle, prog):
+    kernel.add_particle(particle, prog["bank_active"])
+
+
+@for_gpu()
+def add_active(particle, prog):
+    P = kernel.recordlike_to_particle(particle)
+    if SIMPLE_ASYNC:
+        step_async(prog, P)
+    else:
+        find_cell_async(prog, P)
+
+
+@for_cpu()
+def add_source(particle, prog):
+    kernel.add_particle(particle, prog["bank_source"])
+
+
+@for_gpu()
+def add_source(particle, prog):
+    mcdc = device(prog)
+    kernel.add_particle(particle, mcdc["bank_source"])
+
+
+@for_cpu()
+def add_census(particle, prog):
+    kernel.add_particle(particle, prog["bank_census"])
+
+
+@for_gpu()
+def add_census(particle, prog):
+    mcdc = device(prog)
+    kernel.add_particle(particle, mcdc["bank_census"])
+
+
+@for_cpu()
+def add_IC(particle, prog):
+    kernel.add_particle(particle, prog["technique"]["IC_bank_neutron_local"])
+
+
+@for_gpu()
+def add_IC(particle, prog):
+    mcdc = device(prog)
+    kernel.add_particle(particle, mcdc["technique"]["IC_bank_neutron_local"])
+
+
+@for_cpu()
+def local_translate():
+    return np.zeros(1, dtype=type_.translate)[0]
+
+
+@for_gpu()
+def local_translate():
+    trans = cuda.local.array(1, type_.translate)[0]
+    for i in range(3):
+        trans["values"][i] = 0
+    return trans
+
+
+@for_cpu()
+def local_group_array():
+    return np.zeros(1, dtype=type_.group_array)[0]
+
+
+@for_gpu()
+def local_group_array():
+    return cuda.local.array(1, type_.group_array)[0]
+
+
+@for_cpu()
+def local_j_array():
+    return np.zeros(1, dtype=type_.j_array)[0]
+
+
+@for_gpu()
+def local_j_array():
+    return cuda.local.array(1, type_.j_array)[0]
+
+
+@for_cpu()
+def local_particle():
+    return np.zeros(1, dtype=type_.particle)[0]
+
+
+@for_gpu()
+def local_particle():
+    return cuda.local.array(1, dtype=type_.particle)[0]
+
+
+@for_cpu()
+def local_particle_record():
+    return np.zeros(1, dtype=type_.particle_record)[0]
+
+
+@for_gpu()
+def local_particle_record():
+    return cuda.local.array(1, dtype=type_.particle_record)[0]
+
+
+@for_cpu()
+def global_add(ary, idx, val):
+    result = ary[idx]
+    ary[idx] += val
+    return result
+
+
+@for_gpu()
+def global_add(ary, idx, val):
+    return cuda.atomic.add(ary, idx, val)
+
+
+@for_cpu()
+def global_max(ary, idx, val):
+    result = ary[idx]
+    if ary[idx] < val:
+        ary[idx] = val
+    return result
+
+
+@for_gpu()
+def global_max(ary, idx, val):
+    return cuda.atomic.max(ary, idx, val)
+
+
+# =========================================================================
+# Program Specifications
+# =========================================================================
+
+state_spec = None
+one_event_fns = None
+multi_event_fns = None
+
+
+device_gpu, group_gpu, thread_gpu = None, None, None
+iterate_async = None
+
+
+def make_spec(target):
+    global state_spec, one_event_fns, multi_event_fns
+    global device_gpu, group_gpu, thread_gpu
+    global iterate_async
+    if target == "gpu":
+        state_spec = (dev_state_type, grp_state_type, thd_state_type)
+        one_event_fns = [iterate]
+        # multi_event_fns = [source,move,scattering,fission,leakage,bcollision]
+        device_gpu, group_gpu, thread_gpu = harm.RuntimeSpec.access_fns(state_spec)
+        (iterate_async,) = harm.RuntimeSpec.async_dispatch(iterate)
+    elif target != "cpu":
+        unknown_target(target)
+
+
+@njit
+def empty_base_func(prog):
+    pass
+
+
+def make_gpu_loop(
+    state_spec,
+    work_make_fn,
+    step_fn,
+    check_fn,
+    arg_type,
+    initial_fn=empty_base_func,
+    final_fn=empty_base_func,
+):
+    async_fn_list = [step_fn]
+    device_gpu, group_gpu, thread_gpu = harm.RuntimeSpec.access_fns(state_spec)
+
+    def make_work(prog: numba.uintp) -> numba.boolean:
+        return work_make_fn(prog)
+
+    def initialize(prog: numba.uintp):
+        initial_fn(prog)
+
+    def finalize(prog: numba.uintp):
+        final_fn(prog)
+
+    def step(prog: numba.uintp, arg: arg_type):
+
+        step_async()
+
+    (step_async,) = harm.RuntimeSpec.async_dispatch(step)
+
+    pass
+
+
+# =========================================================================
+# Compilation and Main Adapter
+# =========================================================================
+
+
+def compiler(func, target):
+    if target == "cpu":
+        return jit(func, nopython=True, nogil=True)  # , parallel=True)
+    elif target == "cpus":
+        return jit(func, nopython=True, nogil=True, parallel=True)
+    elif target == "gpu_device":
+        return cuda.jit(func, device=True)
+    elif target == "gpu":
+        return cuda.jit(func)
+    else:
+        unknown_target(target)
diff --git a/mcdc/card.py b/mcdc/card.py
index 3e1dcfd1..ad59ffa5 100644
--- a/mcdc/card.py
+++ b/mcdc/card.py
@@ -52,6 +52,7 @@ def reset(self):
             "N_inactive": 0,
             "N_active": 0,
             "N_cycle": 0,
+            "caching": True,
             "save_particle": False,
             "gyration_radius": False,
             "gyration_radius_type": GYRATION_RADIUS_ALL,
@@ -80,6 +81,12 @@ def reset(self):
             "ww": np.ones([1, 1, 1, 1]),
             "ww_width": 2.5,
             "ww_mesh": make_card_mesh(),
+            "domain_decomposition": False,
+            "dd_idx": 0,
+            "dd_mesh": make_card_mesh(),
+            "dd_exchange_rate": 0,
+            "dd_repro": False,
+            "dd_work_ratio": np.array([1]),
             "weight_roulette": False,
             "wr_threshold": 0.0,
             "wr_survive": 1.0,
@@ -108,9 +115,6 @@ def reset(self):
                     "tilt-x": np.zeros([1, 1, 1, 1]),
                     "tilt-y": np.zeros([1, 1, 1, 1]),
                     "tilt-z": np.zeros([1, 1, 1, 1]),
-                    "tilt-xy": np.zeros([1, 1, 1, 1]),
-                    "tilt-xz": np.zeros([1, 1, 1, 1]),
-                    "tilt-yz": np.zeros([1, 1, 1, 1]),
                     "fission-source": np.zeros([1, 1, 1, 1]),
                 },
                 "score_list": {
@@ -120,9 +124,6 @@ def reset(self):
                     "tilt-x": False,
                     "tilt-y": False,
                     "tilt-z": False,
-                    "tilt-xy": False,
-                    "tilt-xz": False,
-                    "tilt-yz": False,
                     "fission-power": False,
                     "fission-source": False,
                 },
diff --git a/mcdc/constant.py b/mcdc/constant.py
index d25df2a4..431d123a 100644
--- a/mcdc/constant.py
+++ b/mcdc/constant.py
@@ -14,6 +14,7 @@
 EVENT_TIME_BOUNDARY = 1 << 7
 EVENT_LATTICE = 1 << 8
 EVENT_SURFACE_MOVE = 1 << 9
+EVENT_DOMAIN = 1 << 10
 
 # Gyration raius type
 GYRATION_RADIUS_ALL = 0
@@ -38,6 +39,12 @@
 PREC = 1.0 + 1e-5  # Precision factor to determine if a distance is smaller
 BANKMAX = 100  # Default maximum active bank
 
+# Domain Decomp mesh crossing flags
+MESH_X = 0
+MESH_Y = 1
+MESH_Z = 2
+MESH_T = 3
+
 # RNG LCG parameters
 RNG_G = nb.uint64(2806196910506780709)
 RNG_C = nb.uint64(1)
diff --git a/mcdc/input_.py b/mcdc/input_.py
index ec409d61..6e61a446 100644
--- a/mcdc/input_.py
+++ b/mcdc/input_.py
@@ -1046,6 +1046,9 @@ def setting(**kw):
         The time edge of the problem, after which all particles will be killed.
     progress_bar : bool
         Whether to display the progress bar (default True; disable when running MC/DC in a loop).
+    caching : bool
+        Whether to store or delete compiled Numba kernels (default True will store; False will delete existing __pycache__ folder).
+        see :ref:`Caching`.
     output_name : str
         Name of the output file MC/DC should save data in (default "output.h5").
     save_input_deck : bool
@@ -1088,6 +1091,7 @@ def setting(**kw):
                 "IC_file",
                 "active_bank_buff",
                 "census_bank_buff",
+                "caching",
             ],
             False,
         )
@@ -1106,6 +1110,7 @@ def setting(**kw):
     IC_file = kw.get("IC_file")
     bank_active_buff = kw.get("active_bank_buff")
     bank_census_buff = kw.get("census_bank_buff")
+    caching = kw.get("caching")
 
     # Check if setting card has been initialized
     card = mcdc.input_deck.setting
@@ -1146,6 +1151,10 @@ def setting(**kw):
     if bank_census_buff is not None:
         card["bank_census_buff"] = int(bank_census_buff)
 
+    # caching is normally enabled
+    if caching is not None:
+        card["caching"] = caching
+
     # Particle tracker
     if particle_tracker is not None:
         card["track_particle"] = particle_tracker
@@ -1388,6 +1397,65 @@ def weight_window(x=None, y=None, z=None, t=None, window=None, width=None):
     return card
 
 
+def domain_decomposition(
+    x=None,
+    y=None,
+    z=None,
+    exchange_rate=100000,
+    work_ratio=None,
+    repro=True,
+):
+    """
+    Activate domain decomposition.
+
+    Parameters
+    ----------
+    x : array_like[float], optional
+        Location of subdomain boundaries in x (default None).
+    y : array_like[float], optional
+        Location of subdomain boundaries in y (default None).
+    z : array_like[float], optional
+        Location of subdomain boundaries in z (default None).
+    exchange_rate : float, optional
+        Number of particles to acumulate in the domain banks before sending.
+    work_ratio : array_like[integer], optional
+        Number of processors in each domain
+
+    Returns
+    -------
+        A domain decomposition card.
+
+    """
+    card = mcdc.input_deck.technique
+    card["domain_decomposition"] = True
+    card["dd_exchange_rate"] = int(exchange_rate)
+    card["dd_repro"] = repro
+    dom_num = 1
+    # Set mesh
+    if x is not None:
+        card["dd_mesh"]["x"] = x
+        dom_num *= len(x)
+    if y is not None:
+        card["dd_mesh"]["y"] = y
+        dom_num *= len(y)
+    if z is not None:
+        card["dd_mesh"]["z"] = z
+        dom_num += len(z)
+    # Set work ratio
+    if work_ratio is None:
+        card["dd_work_ratio"] = None
+    elif work_ratio is not None:
+        card["dd_work_ratio"] = work_ratio
+    card["dd_idx"] = 0
+    card["dd_xp_neigh"] = []
+    card["dd_xn_neigh"] = []
+    card["dd_yp_neigh"] = []
+    card["dd_yn_neigh"] = []
+    card["dd_zp_neigh"] = []
+    card["dd_zn_neigh"] = []
+    return card
+
+
 def iQMC(
     g=None,
     t=None,
@@ -1399,11 +1467,6 @@ def iQMC(
     source_x0=None,
     source_y0=None,
     source_z0=None,
-    source_xy0=None,
-    source_xz0=None,
-    source_yz0=None,
-    source_xyz0=None,
-    fission_source0=None,
     krylov_restart=None,
     fixed_source=None,
     scramble=False,
@@ -1442,16 +1505,6 @@ def iQMC(
         Initial source for tilt-y (default None).
     source_z0 : array_like[float], optional
         Initial source for tilt-z (default None).
-    source_xy0 : array_like[float], optional
-        Initial source for tilt-xy (default None).
-    source_xz0 : array_like[float], optional
-        Initial source for tilt-xz (default None).
-    source_yz0 : array_like[float], optional
-        Initial source for tilt-yz (default None).
-    source_xyz0 : array_like[float], optional
-        Initial source for tilt-xyz (default None).
-    fission_source0 : array_like[float], optional
-        Initial fission source (default None).
     krylov_restart : int, optional
         Max number of iterations for Krylov iteration (default same as maxitt).
     fixed_source : array_like[float], optional
@@ -1527,9 +1580,6 @@ def iQMC(
     if source0 is None:
         source0 = np.zeros_like(phi0)
 
-    if eigenmode_solver == "davidson":
-        card["iqmc"]["krylov_vector_size"] += 1
-
     score_list = card["iqmc"]["score_list"]
     for name in score:
         score_list[name] = True
@@ -1549,31 +1599,10 @@ def iQMC(
         if source_z0 is None:
             source_z0 = np.zeros_like(phi0)
 
-    if score_list["tilt-xy"]:
-        card["iqmc"]["krylov_vector_size"] += 1
-        if source_xy0 is None:
-            source_xy0 = np.zeros_like(phi0)
-
-    if score_list["tilt-xz"]:
-        card["iqmc"]["krylov_vector_size"] += 1
-        if source_xz0 is None:
-            source_xz0 = np.zeros_like(phi0)
-
-    if score_list["tilt-yz"]:
-        card["iqmc"]["krylov_vector_size"] += 1
-        if source_yz0 is None:
-            source_yz0 = np.zeros_like(phi0)
-
-    if fission_source0 is not None:
-        card["iqmc"]["score"]["fission-source"] = fission_source0
-
     card["iqmc"]["score"]["flux"] = phi0
     card["iqmc"]["score"]["tilt-x"] = source_x0
     card["iqmc"]["score"]["tilt-y"] = source_y0
     card["iqmc"]["score"]["tilt-z"] = source_z0
-    card["iqmc"]["score"]["tilt-xy"] = source_xy0
-    card["iqmc"]["score"]["tilt-xz"] = source_xz0
-    card["iqmc"]["score"]["tilt-yz"] = source_yz0
     card["iqmc"]["source"] = source0
     card["iqmc"]["fixed_source"] = fixed_source
     card["iqmc"]["fixed_source_solver"] = fixed_source_solver
diff --git a/mcdc/kernel.py b/mcdc/kernel.py
index 074dfc5e..dbb9d58c 100644
--- a/mcdc/kernel.py
+++ b/mcdc/kernel.py
@@ -7,9 +7,676 @@
 import mcdc.type_ as type_
 
 from mcdc.constant import *
-from mcdc.print_ import print_error
+from mcdc.print_ import print_error, print_msg
 from mcdc.type_ import score_list, iqmc_score_list
 from mcdc.loop import loop_source
+import mcdc.adapt as adapt
+from mcdc.adapt import toggle, for_cpu, for_gpu
+
+# =============================================================================
+# Domain Decomposition
+# =============================================================================
+
+# =============================================================================
+# Domain crossing event
+# =============================================================================
+
+
+@toggle("domain_decomp")
+def domain_crossing(P, mcdc):
+    # Domain mesh crossing
+    seed = P["rng_seed"]
+    max_size = mcdc["technique"]["dd_exchange_rate"]
+    if mcdc["technique"]["domain_decomposition"]:
+        mesh = mcdc["technique"]["dd_mesh"]
+        # Determine which dimension is crossed
+        x, y, z, t, directions = mesh_crossing_evaluate(P, mesh)
+        if len(directions) == 0:
+            return
+        elif len(directions) > 1:
+            for direction in directions[1:]:
+                if direction == MESH_X:
+                    P["x"] -= SHIFT * P["ux"] / np.abs(P["ux"])
+                if direction == MESH_Y:
+                    P["y"] -= SHIFT * P["uy"] / np.abs(P["uy"])
+                if direction == MESH_Z:
+                    P["z"] -= SHIFT * P["uz"] / np.abs(P["uz"])
+        flag = directions[0]
+        # Score on tally
+        if flag == MESH_X and P["ux"] > 0:
+            add_particle(P, mcdc["domain_decomp"]["bank_xp"])
+            if get_bank_size(mcdc["domain_decomp"]["bank_xp"]) == max_size:
+                dd_particle_send(mcdc)
+        if flag == MESH_X and P["ux"] < 0:
+            add_particle(P, mcdc["domain_decomp"]["bank_xn"])
+            if get_bank_size(mcdc["domain_decomp"]["bank_xn"]) == max_size:
+                dd_particle_send(mcdc)
+        if flag == MESH_Y and P["uy"] > 0:
+            add_particle(P, mcdc["domain_decomp"]["bank_yp"])
+            if get_bank_size(mcdc["domain_decomp"]["bank_yp"]) == max_size:
+                dd_particle_send(mcdc)
+        if flag == MESH_Y and P["uy"] < 0:
+            add_particle(P, mcdc["domain_decomp"]["bank_yn"])
+            if get_bank_size(mcdc["domain_decomp"]["bank_yn"]) == max_size:
+                dd_particle_send(mcdc)
+        if flag == MESH_Z and P["uz"] > 0:
+            add_particle(P, mcdc["domain_decomp"]["bank_zp"])
+            if get_bank_size(mcdc["domain_decomp"]["bank_zp"]) == max_size:
+                dd_particle_send(mcdc)
+        if flag == MESH_Z and P["uz"] < 0:
+            add_particle(P, mcdc["domain_decomp"]["bank_zn"])
+            if get_bank_size(mcdc["domain_decomp"]["bank_zn"]) == max_size:
+                dd_particle_send(mcdc)
+        P["alive"] = False
+
+
+# =============================================================================
+# Send full domain bank
+# =============================================================================
+
+
+requests = []
+
+
+def save_request(req_pair):
+    global requests
+
+    updated_requests = []
+
+    status = MPI.Status()
+    for req, buf in requests:
+        if not req.Test(status):
+            updated_requests.append((req, buf))
+
+    updated_requests.append(req_pair)
+    requests = updated_requests
+
+
+def clear_requests():
+    global requests
+    for req, buf in requests:
+        req.Free()
+
+    requests = []
+
+
+@njit
+def dd_check_halt(mcdc):
+    return mcdc["domain_decomp"]["work_done"]
+
+
+@njit
+def dd_check_in(mcdc):
+    mcdc["domain_decomp"]["send_count"] = 0
+    mcdc["domain_decomp"]["recv_count"] = 0
+    mcdc["domain_decomp"]["send_total"] = 0
+    mcdc["domain_decomp"]["rank_busy"] = True
+
+    with objmode(rank="int64", total="int64"):
+        rank = MPI.COMM_WORLD.Get_rank()
+        total = MPI.COMM_WORLD.Get_size()
+
+    if rank == 0:
+        mcdc["domain_decomp"]["busy_total"] = total
+    else:
+        mcdc["domain_decomp"]["busy_total"] = 0
+
+
+@njit
+def dd_check_out(mcdc):
+    with objmode():
+        rank = MPI.COMM_WORLD.Get_rank()
+        send_count = mcdc["domain_decomp"]["send_count"]
+        recv_count = mcdc["domain_decomp"]["recv_count"]
+        send_total = mcdc["domain_decomp"]["send_total"]
+        busy_total = mcdc["domain_decomp"]["busy_total"]
+        rank_busy = mcdc["domain_decomp"]["rank_busy"]
+
+        if send_count != 0:
+            print(
+                f"Domain decomposed loop closed out with non-zero send count {send_count} in rank {rank}"
+            )
+            mcdc["domain_decomp"]["send_count"] = 0
+
+        if recv_count != 0:
+            print(
+                f"Domain decomposed loop closed out with non-zero recv count {recv_count} in rank {rank}"
+            )
+            mcdc["domain_decomp"]["recv_count"] = 0
+
+        if send_total != 0:
+            print(
+                f"Domain decomposed loop closed out with non-zero send total {send_total} in rank {rank}"
+            )
+            mcdc["domain_decomp"]["send_total"] = 0
+
+        if busy_total != 0:
+            print(
+                f"Domain decomposed loop closed out with non-zero busy total {busy_total} in rank {rank}"
+            )
+            mcdc["domain_decomp"]["busy_total"] = 0
+
+        if rank_busy:
+            print(
+                f"Domain decomposed loop closed out with rank {rank} still marked as busy"
+            )
+            mcdc["domain_decomp"]["rank_busy"] = 0
+
+        clear_requests()
+
+
+@njit
+def dd_signal_halt(mcdc):
+
+    with objmode():
+        for rank in range(1, MPI.COMM_WORLD.Get_size()):
+            dummy_buff = np.zeros((1,), dtype=np.int32)
+            MPI.COMM_WORLD.Send(dummy_buff, dest=rank, tag=3)
+
+    mcdc["domain_decomp"]["work_done"] = True
+
+
+@njit
+def dd_signal_block(mcdc):
+
+    with objmode(rank="int64"):
+        rank = MPI.COMM_WORLD.Get_rank()
+
+    send_delta = (
+        mcdc["domain_decomp"]["send_count"] - mcdc["domain_decomp"]["recv_count"]
+    )
+    if rank == 0:
+        mcdc["domain_decomp"]["send_total"] += send_delta
+        mcdc["domain_decomp"]["busy_total"] -= 1
+    else:
+        with objmode():
+            buff = np.zeros((1,), dtype=type_.dd_turnstile_event)
+            buff[0]["busy_delta"] = -1
+            buff[0]["send_delta"] = send_delta
+            req = MPI.COMM_WORLD.Isend(
+                [buff, type_.dd_turnstile_event_mpi], dest=0, tag=2
+            )
+            save_request((req, buff))
+
+    mcdc["domain_decomp"]["send_count"] = 0
+    mcdc["domain_decomp"]["recv_count"] = 0
+
+    if (
+        (rank == 0)
+        and (mcdc["domain_decomp"]["busy_total"] == 0)
+        and (mcdc["domain_decomp"]["send_total"] == 0)
+    ):
+        dd_signal_halt(mcdc)
+
+
+@njit
+def dd_signal_unblock(mcdc):
+
+    with objmode(rank="int64"):
+        rank = MPI.COMM_WORLD.Get_rank()
+
+    send_delta = (
+        mcdc["domain_decomp"]["send_count"] - mcdc["domain_decomp"]["recv_count"]
+    )
+
+    if rank == 0:
+        mcdc["domain_decomp"]["send_total"] += send_delta
+        mcdc["domain_decomp"]["busy_total"] += 1
+        if (mcdc["domain_decomp"]["busy_total"] == 0) and (
+            mcdc["domain_decomp"]["send_total"] == 0
+        ):
+            dd_signal_halt(mcdc)
+    else:
+        with objmode():
+            buff = np.zeros((1,), dtype=type_.dd_turnstile_event)
+            buff[0]["busy_delta"] = 1
+            buff[0]["send_delta"] = send_delta
+            req = MPI.COMM_WORLD.Isend(
+                [buff, type_.dd_turnstile_event_mpi], dest=0, tag=2
+            )
+            save_request((req, buff))
+    mcdc["domain_decomp"]["send_count"] = 0
+    mcdc["domain_decomp"]["recv_count"] = 0
+
+
+@njit
+def dd_distribute_bank(mcdc, bank, dest_list):
+
+    with objmode(send_delta="int64"):
+        dest_count = len(dest_list)
+        send_delta = 0
+        for i, dest in enumerate(dest_list):
+            size = get_bank_size(bank)
+            ratio = int(size / dest_count)
+            start = ratio * i
+            end = start + ratio
+            if i == dest_count - 1:
+                end = size
+            sub_bank = np.array(bank["particles"][start:end])
+            if sub_bank.shape[0] > 0:
+                req = MPI.COMM_WORLD.Isend(
+                    [sub_bank, type_.particle_record_mpi], dest=dest, tag=1
+                )
+                save_request((req, sub_bank))
+                send_delta += end - start
+
+    mcdc["domain_decomp"]["send_count"] += send_delta
+    set_bank_size(bank, 0)
+
+
+@njit
+def dd_particle_send(mcdc):
+    dd_distribute_bank(
+        mcdc, mcdc["domain_decomp"]["bank_xp"], mcdc["technique"]["dd_xp_neigh"]
+    )
+    dd_distribute_bank(
+        mcdc, mcdc["domain_decomp"]["bank_xn"], mcdc["technique"]["dd_xn_neigh"]
+    )
+    dd_distribute_bank(
+        mcdc, mcdc["domain_decomp"]["bank_yp"], mcdc["technique"]["dd_yp_neigh"]
+    )
+    dd_distribute_bank(
+        mcdc, mcdc["domain_decomp"]["bank_yn"], mcdc["technique"]["dd_yn_neigh"]
+    )
+    dd_distribute_bank(
+        mcdc, mcdc["domain_decomp"]["bank_zp"], mcdc["technique"]["dd_zp_neigh"]
+    )
+    dd_distribute_bank(
+        mcdc, mcdc["domain_decomp"]["bank_zn"], mcdc["technique"]["dd_zn_neigh"]
+    )
+
+
+# =============================================================================
+# Receive particles and clear banks
+# =============================================================================
+
+
+@njit
+def dd_get_recv_tag():
+
+    with objmode(tag="int64"):
+        status = MPI.Status()
+        MPI.COMM_WORLD.Probe(status=status)
+        tag = status.Get_tag()
+
+    return tag
+
+
+@njit
+def dd_recv_particles(mcdc):
+
+    buff = np.zeros(
+        mcdc["domain_decomp"]["bank_zp"]["particles"].shape[0],
+        dtype=type_.particle_record,
+    )
+
+    with objmode(size="int64"):
+        status = MPI.Status()
+        MPI.COMM_WORLD.Recv([buff, type_.particle_record_mpi], status=status)
+        size = status.Get_count(type_.particle_record_mpi)
+        rank = MPI.COMM_WORLD.Get_rank()
+
+    mcdc["domain_decomp"]["recv_count"] += size
+
+    # Set source bank from buffer
+    for i in range(size):
+        add_particle(buff[i], mcdc["bank_active"])
+
+    if (
+        mcdc["domain_decomp"]["recv_count"] > 0
+        and not mcdc["domain_decomp"]["rank_busy"]
+    ):
+        dd_signal_unblock(mcdc)
+        mcdc["domain_decomp"]["rank_busy"] = True
+
+
+@njit
+def dd_recv_turnstile(mcdc):
+
+    with objmode(busy_delta="int64", send_delta="int64"):
+        event_buff = np.zeros((1,), dtype=type_.dd_turnstile_event)
+        MPI.COMM_WORLD.Recv([event_buff, type_.dd_turnstile_event_mpi])
+        busy_delta = event_buff[0]["busy_delta"]
+        send_delta = event_buff[0]["send_delta"]
+        rank = MPI.COMM_WORLD.Get_rank()
+        busy_total = mcdc["domain_decomp"]["busy_total"]
+        send_total = mcdc["domain_decomp"]["send_total"]
+
+    mcdc["domain_decomp"]["busy_total"] += busy_delta
+    mcdc["domain_decomp"]["send_total"] += send_delta
+
+    if (mcdc["domain_decomp"]["busy_total"] == 0) and (
+        mcdc["domain_decomp"]["send_total"] == 0
+    ):
+        dd_signal_halt(mcdc)
+
+
+@njit
+def dd_recv_halt(mcdc):
+
+    with objmode():
+        dummy_buff = np.zeros((1,), dtype=np.int32)
+        MPI.COMM_WORLD.Recv(dummy_buff)
+        work_done = 1
+        rank = MPI.COMM_WORLD.Get_rank()
+
+    mcdc["domain_decomp"]["work_done"] = True
+
+
+@njit
+def dd_recv(mcdc):
+
+    if mcdc["domain_decomp"]["rank_busy"]:
+        dd_signal_block(mcdc)
+        mcdc["domain_decomp"]["rank_busy"] = False
+
+    if not mcdc["domain_decomp"]["work_done"]:
+        tag = dd_get_recv_tag()
+
+        if tag == 1:
+            dd_recv_particles(mcdc)
+        elif tag == 2:
+            dd_recv_turnstile(mcdc)
+        elif tag == 3:
+            dd_recv_halt(mcdc)
+
+
+# =============================================================================
+# Particle in domain
+# =============================================================================
+
+
+# Check if particle is in domain
+@njit
+def particle_in_domain(P, mcdc):
+    d_idx = mcdc["dd_idx"]
+    d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1
+    d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1
+    d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1
+
+    d_iz = int(d_idx / (d_Nx * d_Ny))
+    d_iy = int((d_idx - d_Nx * d_Ny * d_iz) / d_Nx)
+    d_ix = int(d_idx - d_Nx * d_Ny * d_iz - d_Nx * d_iy)
+
+    x_cell = binary_search(P["x"], mcdc["technique"]["dd_mesh"]["x"])
+    y_cell = binary_search(P["y"], mcdc["technique"]["dd_mesh"]["y"])
+    z_cell = binary_search(P["z"], mcdc["technique"]["dd_mesh"]["z"])
+
+    if d_ix == x_cell:
+        if d_iy == y_cell:
+            if d_iz == z_cell:
+                return True
+    return False
+
+
+# =============================================================================
+# Source in domain
+# =============================================================================
+
+
+# Check for source in domain
+@njit
+def source_in_domain(source, domain_mesh, d_idx):
+    d_Nx = domain_mesh["x"].size - 1
+    d_Ny = domain_mesh["y"].size - 1
+    d_Nz = domain_mesh["z"].size - 1
+
+    d_iz = int(d_idx / (d_Nx * d_Ny))
+    d_iy = int((d_idx - d_Nx * d_Ny * d_iz) / d_Nx)
+    d_ix = int(d_idx - d_Nx * d_Ny * d_iz - d_Nx * d_iy)
+
+    d_x = [domain_mesh["x"][d_ix], domain_mesh["x"][d_ix + 1]]
+    d_y = [domain_mesh["y"][d_iy], domain_mesh["y"][d_iy + 1]]
+    d_z = [domain_mesh["z"][d_iz], domain_mesh["z"][d_iz + 1]]
+
+    if (
+        d_x[0] <= source["box_x"][0] <= d_x[1]
+        or d_x[0] <= source["box_x"][1] <= d_x[1]
+        or (source["box_x"][0] < d_x[0] and source["box_x"][1] > d_x[1])
+    ):
+        if (
+            d_y[0] <= source["box_y"][0] <= d_y[1]
+            or d_y[0] <= source["box_y"][1] <= d_y[1]
+            or (source["box_y"][0] < d_y[0] and source["box_y"][1] > d_y[1])
+        ):
+            if (
+                d_z[0] <= source["box_z"][0] <= d_z[1]
+                or d_z[0] <= source["box_z"][1] <= d_z[1]
+                or (source["box_z"][0] < d_z[0] and source["box_z"][1] > d_z[1])
+            ):
+                return True
+            else:
+                return False
+        else:
+            return False
+    else:
+        return False
+
+
+# =============================================================================
+# Compute domain load
+# =============================================================================
+
+
+@njit
+def domain_work(mcdc, domain, N):
+    domain_mesh = mcdc["technique"]["dd_mesh"]
+
+    d_Nx = domain_mesh["x"].size - 1
+    d_Ny = domain_mesh["y"].size - 1
+    d_Nz = domain_mesh["z"].size - 1
+    work_start = 0
+    for d_idx in range(domain):
+        d_iz = int(d_idx / (d_Nx * d_Ny))
+        d_iy = int((d_idx - d_Nx * d_Ny * d_iz) / d_Nx)
+        d_ix = int(d_idx - d_Nx * d_Ny * d_iz - d_Nx * d_iy)
+
+        d_x = [domain_mesh["x"][d_ix], domain_mesh["x"][d_ix + 1]]
+        d_y = [domain_mesh["y"][d_iy], domain_mesh["y"][d_iy + 1]]
+        d_z = [domain_mesh["z"][d_iz], domain_mesh["z"][d_iz + 1]]
+        # Compute volumes of sources and numbers of particles
+
+        Psum = 0
+
+        Nm = 0
+        num_source = 0
+        for source in mcdc["sources"]:
+            Psum += source["prob"]
+            num_source += 1
+        Vi = np.zeros(num_source)
+        Vim = np.zeros(num_source)
+        Ni = np.zeros(num_source)
+        i = 0
+        for source in mcdc["sources"]:
+            Ni[i] = N * source["prob"] / Psum
+            Vi[i] = 1
+            Vim[i] = 1
+            if source["box"] == True:
+                xV = source["box_x"][1] - source["box_x"][0]
+                if xV != 0:
+                    Vi[i] *= xV
+                    Vim[i] *= min(source["box_x"][1], d_x[1]) - max(
+                        source["box_x"][0], d_x[0]
+                    )
+                yV = source["box_y"][1] - source["box_y"][0]
+                if yV != 0:
+                    Vi[i] *= yV
+                    Vim[i] *= min(source["box_y"][1], d_y[1]) - max(
+                        source["box_y"][0], d_y[0]
+                    )
+                zV = source["box_z"][1] - source["box_z"][0]
+                if zV != 0:
+                    Vi[i] *= zV
+                    Vim[i] *= min(source["box_z"][1], d_z[1]) - max(
+                        source["box_z"][0], d_z[0]
+                    )
+            if not source_in_domain(source, domain_mesh, d_idx):
+                Vim[i] = 0
+            i += 1
+        for source in range(num_source):
+            Nm += Ni[source] * Vim[source] / Vi[source]
+        work_start += Nm
+    d_idx = domain
+    d_iz = int(mcdc["dd_idx"] / (d_Nx * d_Ny))
+    d_iy = int((mcdc["dd_idx"] - d_Nx * d_Ny * d_iz) / d_Nx)
+    d_ix = int(mcdc["dd_idx"] - d_Nx * d_Ny * d_iz - d_Nx * d_iy)
+
+    d_x = [domain_mesh["x"][d_ix], domain_mesh["x"][d_ix + 1]]
+    d_y = [domain_mesh["y"][d_iy], domain_mesh["y"][d_iy + 1]]
+    d_z = [domain_mesh["z"][d_iz], domain_mesh["z"][d_iz + 1]]
+    # Compute volumes of sources and numbers of particles
+    num_source = len(mcdc["sources"])
+    Vi = np.zeros(num_source)
+    Vim = np.zeros(num_source)
+    Ni = np.zeros(num_source)
+    Psum = 0
+
+    Nm = 0
+    for source in mcdc["sources"]:
+        Psum += source["prob"]
+    i = 0
+    for source in mcdc["sources"]:
+        Ni[i] = N * source["prob"] / Psum
+        Vi[i] = 1
+        Vim[i] = 1
+
+        if source["box"] == True:
+            xV = source["box_x"][1] - source["box_x"][0]
+            if xV != 0:
+                Vi[i] *= xV
+                Vim[i] *= min(source["box_x"][1], d_x[1]) - max(
+                    source["box_x"][0], d_x[0]
+                )
+            yV = source["box_y"][1] - source["box_y"][0]
+            if yV != 0:
+                Vi[i] *= yV
+                Vim[i] *= min(source["box_y"][1], d_y[1]) - max(
+                    source["box_y"][0], d_y[0]
+                )
+            zV = source["box_z"][1] - source["box_z"][0]
+            if zV != 0:
+                Vi[i] *= zV
+                Vim[i] *= min(source["box_z"][1], d_z[1]) - max(
+                    source["box_z"][0], d_z[0]
+                )
+        i += 1
+    for source in range(num_source):
+        Nm += Ni[source] * Vim[source] / Vi[source]
+    Nm /= mcdc["technique"]["dd_work_ratio"][domain]
+    rank = mcdc["mpi_rank"]
+    if mcdc["technique"]["dd_work_ratio"][domain] > 1:
+        work_start += Nm * (rank - np.sum(mcdc["technique"]["dd_work_ratio"][0:d_idx]))
+    total_v = 0
+    for source in range(len(mcdc["sources"])):
+        total_v += Vim[source]
+    i = 0
+    for source in mcdc["sources"]:
+        if total_v != 0:
+            source["prob"] *= 2 * Vim[i] / total_v
+        i += 1
+    return (int(Nm), int(work_start))
+
+
+# =============================================================================
+# Source particle in domain only
+# =============================================================================
+
+
+@njit()
+def source_particle_dd(seed, mcdc):
+    domain_mesh = mcdc["technique"]["dd_mesh"]
+    d_idx = mcdc["dd_idx"]
+
+    d_Nx = domain_mesh["x"].size - 1
+    d_Ny = domain_mesh["y"].size - 1
+    d_Nz = domain_mesh["z"].size - 1
+
+    d_iz = int(mcdc["dd_idx"] / (d_Nx * d_Ny))
+    d_iy = int((mcdc["dd_idx"] - d_Nx * d_Ny * d_iz) / d_Nx)
+    d_ix = int(mcdc["dd_idx"] - d_Nx * d_Ny * d_iz - d_Nx * d_iy)
+
+    d_x = [domain_mesh["x"][d_ix], domain_mesh["x"][d_ix + 1]]
+    d_y = [domain_mesh["y"][d_iy], domain_mesh["y"][d_iy + 1]]
+    d_z = [domain_mesh["z"][d_iz], domain_mesh["z"][d_iz + 1]]
+
+    P = np.zeros(1, dtype=type_.particle_record)[0]
+
+    P["rng_seed"] = seed
+    # Sample source
+    xi = rng(P)
+    tot = 0.0
+    for source in mcdc["sources"]:
+        if source_in_domain(source, domain_mesh, d_idx):
+            tot += source["prob"]
+            if tot >= xi:
+                break
+
+    # Position
+    if source["box"]:
+        x = sample_uniform(
+            max(source["box_x"][0], d_x[0]), min(source["box_x"][1], d_x[1]), P
+        )
+        y = sample_uniform(
+            max(source["box_y"][0], d_y[0]), min(source["box_y"][1], d_y[1]), P
+        )
+        z = sample_uniform(
+            max(source["box_z"][0], d_z[0]), min(source["box_z"][1], d_z[1]), P
+        )
+
+    else:
+        x = source["x"]
+        y = source["y"]
+        z = source["z"]
+
+    # Direction
+    if source["isotropic"]:
+        ux, uy, uz = sample_isotropic_direction(P)
+    elif source["white"]:
+        ux, uy, uz = sample_white_direction(
+            source["white_x"], source["white_y"], source["white_z"], P
+        )
+    else:
+        ux = source["ux"]
+        uy = source["uy"]
+        uz = source["uz"]
+
+    # Energy and time
+    g = sample_discrete(source["group"], P)
+    t = sample_uniform(source["time"][0], source["time"][1], P)
+
+    # Make and return particle
+    P["x"] = x
+    P["y"] = y
+    P["z"] = z
+    P["t"] = t
+    P["ux"] = ux
+    P["uy"] = uy
+    P["uz"] = uz
+    P["g"] = g
+    P["w"] = 1
+    P["sensitivity_ID"] = 0
+    return P
+
+
+@njit
+def distribute_work_dd(N, mcdc, precursor=False):
+    # Total # of work
+    work_size_total = N
+
+    if not mcdc["technique"]["dd_repro"]:
+        work_size, work_start = domain_work(mcdc, mcdc["dd_idx"], N)
+    else:
+        work_start = 0
+        work_size = work_size_total
+
+    if not precursor:
+        mcdc["mpi_work_start"] = work_start
+        mcdc["mpi_work_size"] = work_size
+        mcdc["mpi_work_size_total"] = work_size_total
+    else:
+        mcdc["mpi_work_start_precursor"] = work_start
+        mcdc["mpi_work_size_precursor"] = work_size
+        mcdc["mpi_work_size_total_precursor"] = work_size_total
 
 
 # =============================================================================
@@ -98,12 +765,12 @@ def sample_piecewise_linear(cdf, P):
 # =============================================================================
 
 
-@njit(numba.uint64(numba.uint64, numba.uint64))
+@njit
 def wrapping_mul(a, b):
     return a * b
 
 
-@njit(numba.uint64(numba.uint64, numba.uint64))
+@njit
 def wrapping_add(a, b):
     return a + b
 
@@ -129,7 +796,7 @@ def adapt_rng(object_mode=False):
         wrapping_mul = wrapping_mul_python
 
 
-@njit(numba.uint64(numba.uint64, numba.uint64))
+@njit
 def split_seed(key, seed):
     """murmur_hash64a"""
     multiplier = numba.uint64(0xC6A4A7935BD1E995)
@@ -152,8 +819,9 @@ def split_seed(key, seed):
     return hash_value
 
 
-@njit(numba.uint64(numba.uint64))
+@njit
 def rng_(seed):
+    seed = numba.uint64(seed)
     return wrapping_add(wrapping_mul(RNG_G, seed), RNG_C) & RNG_MOD_MASK
 
 
@@ -185,7 +853,7 @@ def rng_array(seed, shape, size):
 
 @njit
 def source_particle(seed, mcdc):
-    P = np.zeros(1, dtype=type_.particle_record)[0]
+    P: type_.particle_record = adapt.local_particle_record()
     P["rng_seed"] = seed
 
     # Sample source
@@ -251,35 +919,58 @@ def source_particle(seed, mcdc):
 # =============================================================================
 
 
+@njit
+def get_bank_size(bank):
+    return bank["size"][0]
+
+
+@njit
+def set_bank_size(bank, value):
+    bank["size"][0] = value
+
+
+@njit
+def add_bank_size(bank, value):
+    return adapt.global_add(bank["size"], 0, value)
+
+
+@for_cpu()
+def full_bank_print(bank):
+    with objmode():
+        print_error("Particle %s bank is full." % bank["tag"])
+
+
+@for_gpu()
+def full_bank_print(bank):
+    pass
+
+
 @njit
 def add_particle(P, bank):
+
+    idx = add_bank_size(bank, 1)
+
     # Check if bank is full
-    if bank["size"] == bank["particles"].shape[0]:
-        with objmode():
-            print_error("Particle %s bank is full." % bank["tag"])
+    if idx >= bank["particles"].shape[0]:
+        full_bank_print(bank)
 
     # Set particle
-    bank["particles"][bank["size"]] = P
-
-    # Increment size
-    bank["size"] += 1
+    copy_recordlike(bank["particles"][idx], P)
 
 
 @njit
-def get_particle(bank, mcdc):
-    # Check if bank is empty
-    if bank["size"] == 0:
-        with objmode():
-            print_error("Particle %s bank is empty." % bank["tag"])
+def get_particle(P, bank, mcdc):
 
-    # Decrement size
-    bank["size"] -= 1
+    idx = add_bank_size(bank, -1) - 1
 
-    # Create in-flight particle
-    P = np.zeros(1, dtype=type_.particle)[0]
+    # Check if bank is empty
+    if idx < 0:
+        return False
+        # with objmode():
+        #    print_error("Particle %s bank is empty." % bank["tag"])
 
     # Set attribute
-    P_rec = bank["particles"][bank["size"]]
+    P_rec = bank["particles"][idx]
     P["x"] = P_rec["x"]
     P["y"] = P_rec["y"]
     P["z"] = P_rec["z"]
@@ -303,7 +994,7 @@ def get_particle(bank, mcdc):
     P["cell_ID"] = -1
     P["surface_ID"] = -1
     P["event"] = -1
-    return P
+    return True
 
 
 @njit
@@ -322,17 +1013,18 @@ def manage_particle_banks(seed, mcdc):
         population_control(seed, mcdc)
     else:
         # Swap census and source bank
-        size = mcdc["bank_census"]["size"]
-        mcdc["bank_source"]["size"] = size
+        size = get_bank_size(mcdc["bank_census"])
+        set_bank_size(mcdc["bank_source"], size)
         mcdc["bank_source"]["particles"][:size] = mcdc["bank_census"]["particles"][
             :size
         ]
 
     # MPI rebalance
-    bank_rebalance(mcdc)
+    if not mcdc["technique"]["domain_decomposition"]:
+        bank_rebalance(mcdc)
 
     # Zero out census bank
-    mcdc["bank_census"]["size"] = 0
+    set_bank_size(mcdc["bank_census"], 0)
 
     # Manage IC bank
     if mcdc["technique"]["IC_generator"] and mcdc["cycle_active"]:
@@ -359,8 +1051,8 @@ def manage_IC_bank(mcdc):
 
     with objmode(Nn="int64", Np="int64"):
         # Create MPI-supported numpy object
-        Nn = mcdc["technique"]["IC_bank_neutron_local"]["size"]
-        Np = mcdc["technique"]["IC_bank_precursor_local"]["size"]
+        Nn = get_bank_size(mcdc["technique"]["IC_bank_neutron_local"])
+        Np = get_bank_size(mcdc["technique"]["IC_bank_precursor_local"])
 
         neutrons = MPI.COMM_WORLD.gather(
             mcdc["technique"]["IC_bank_neutron_local"]["particles"][:Nn]
@@ -383,10 +1075,8 @@ def manage_IC_bank(mcdc):
 
     # Set global bank from buffer
     if mcdc["mpi_master"]:
-        start_n = mcdc["technique"]["IC_bank_neutron"]["size"]
-        start_p = mcdc["technique"]["IC_bank_precursor"]["size"]
-        mcdc["technique"]["IC_bank_neutron"]["size"] += Nn
-        mcdc["technique"]["IC_bank_precursor"]["size"] += Np
+        start_n = add_bank_size(mcdc["technique"]["IC_bank_neutron"], Nn)
+        start_p = add_bank_size(mcdc["technique"]["IC_bank_precursor"], Np)
         for i in range(Nn):
             mcdc["technique"]["IC_bank_neutron"]["particles"][start_n + i] = buff_n[i]
         for i in range(Np):
@@ -395,13 +1085,13 @@ def manage_IC_bank(mcdc):
             ]
 
     # Reset local banks
-    mcdc["technique"]["IC_bank_neutron_local"]["size"] = 0
-    mcdc["technique"]["IC_bank_precursor_local"]["size"] = 0
+    set_bank_size(mcdc["technique"]["IC_bank_neutron_local"], 0)
+    set_bank_size(mcdc["technique"]["IC_bank_precursor_local"], 0)
 
 
 @njit
 def bank_scanning(bank, mcdc):
-    N_local = bank["size"]
+    N_local = get_bank_size(bank)
 
     # Starting index
     buff = np.zeros(1, dtype=np.int64)
@@ -421,7 +1111,7 @@ def bank_scanning(bank, mcdc):
 @njit
 def bank_scanning_weight(bank, mcdc):
     # Local weight CDF
-    N_local = bank["size"]
+    N_local = get_bank_size(bank)
     w_cdf = np.zeros(N_local + 1)
     for i in range(N_local):
         w_cdf[i + 1] = w_cdf[i] + bank["particles"][i]["w"]
@@ -445,7 +1135,7 @@ def bank_scanning_weight(bank, mcdc):
 
 @njit
 def bank_scanning_DNP(bank, mcdc):
-    N_DNP_local = bank["size"]
+    N_DNP_local = get_bank_size(bank)
 
     # Get sum of ceil-ed local DNP weights
     N_local = 0
@@ -482,7 +1172,7 @@ def normalize_weight(bank, norm):
 def total_weight(bank):
     # Local total weight
     W_local = np.zeros(1)
-    for i in range(bank["size"]):
+    for i in range(get_bank_size(bank)):
         W_local[0] += bank["particles"][i]["w"]
 
     # MPI Allreduce
@@ -492,6 +1182,14 @@ def total_weight(bank):
     return buff[0]
 
 
+@njit
+def allreduce(value):
+    total = np.zeros(1, np.float64)
+    with objmode():
+        MPI.COMM_WORLD.Allreduce(np.array([value], np.float64), total, MPI.SUM)
+    return total[0]
+
+
 @njit
 def bank_rebalance(mcdc):
     # Scan the bank
@@ -527,7 +1225,7 @@ def bank_rebalance(mcdc):
 
     with objmode(size="int64"):
         # Create MPI-supported numpy object
-        size = mcdc["bank_source"]["size"]
+        size = get_bank_size(mcdc["bank_source"])
         bank = np.array(mcdc["bank_source"]["particles"][:size])
 
         # If offside, need to receive first
@@ -568,7 +1266,7 @@ def bank_rebalance(mcdc):
             buff[i] = bank[i]
 
     # Set source bank from buffer
-    mcdc["bank_source"]["size"] = size
+    set_bank_size(mcdc["bank_source"], size)
     for i in range(size):
         mcdc["bank_source"]["particles"][i] = buff[i]
 
@@ -612,8 +1310,33 @@ def distribute_work(N, mcdc, precursor=False):
 # =============================================================================
 
 
+@for_cpu()
+def pn_over_one():
+    with objmode():
+        print_error("Pn > 1.0.")
+
+
+@for_gpu()
+def pn_over_one():
+    pass
+
+
+@for_cpu()
+def pp_over_one():
+    with objmode():
+        print_error("Pp > 1.0.")
+
+
+@for_gpu()
+def pp_over_one():
+    pass
+
+
 @njit
-def bank_IC(P, mcdc):
+def bank_IC(P, prog):
+
+    mcdc = adapt.device(prog)
+
     # TODO: Consider multi-nuclide material
     material = mcdc["nuclides"][P["material_ID"]]
 
@@ -640,19 +1363,19 @@ def bank_IC(P, mcdc):
 
     # TODO: Splitting for Pn > 1.0
     if Pn > 1.0:
-        with objmode():
-            print_error("Pn > 1.0.")
+        pn_over_one()
 
     # Sample particle
     if rng(P) < Pn:
         P_new = split_particle(P)
         P_new["w"] = 1.0
         P_new["t"] = 0.0
-        add_particle(P_new, mcdc["technique"]["IC_bank_neutron_local"])
+        adapt.add_IC(P_new, prog)
 
         # Accumulate fission
         SigmaF = material["fission"][g]
-        mcdc["technique"]["IC_fission_score"] += v * SigmaF
+        # mcdc["technique"]["IC_fission_score"][0] += v * SigmaF
+        adapt.global_add(mcdc["technique"]["IC_fission_score"], 0, v * SigmaF)
 
     # =========================================================================
     # Precursor
@@ -686,18 +1409,16 @@ def bank_IC(P, mcdc):
 
     # TODO: Splitting for Pp > 1.0
     if Pp > 1.0:
-        with objmode():
-            print_error("Pp > 1.0.")
+        pp_over_one()
 
     # Sample precursor
     if rng(P) < Pp:
-        idx = mcdc["technique"]["IC_bank_precursor_local"]["size"]
+        idx = add_bank_size(mcdc["technique"]["IC_bank_precursor_local"], 1)
         precursor = mcdc["technique"]["IC_bank_precursor_local"]["precursors"][idx]
         precursor["x"] = P["x"]
         precursor["y"] = P["y"]
         precursor["z"] = P["z"]
         precursor["w"] = wp_prime / wn_prime
-        mcdc["technique"]["IC_bank_precursor_local"]["size"] += 1
 
         # Sample group
         xi = rng(P) * total
@@ -716,7 +1437,7 @@ def bank_IC(P, mcdc):
 # Population control techniques
 # =============================================================================
 # TODO: Make it a stand-alone function that takes (bank_init, bank_final, M).
-#       The challenge is in the use of type-dependent copy_particle which is
+#       The challenge is in the use of type-dependent copy_record which is
 #       required due to pure-Python behavior of taking things by reference.
 
 
@@ -754,14 +1475,14 @@ def pct_combing(seed, mcdc):
     tooth_end = math.floor((idx_end - offset) / td) + 1
 
     # Locally sample particles from census bank
-    bank_source["size"] = 0
+    set_bank_size(bank_source, 0)
     for i in range(tooth_start, tooth_end):
         tooth = i * td + offset
         idx = math.floor(tooth) - idx_start
-        P = copy_particle(bank_census["particles"][idx])
+        P = copy_record(bank_census["particles"][idx])
         # Set weight
         P["w"] *= td
-        add_particle(P, bank_source)
+        adapt.add_source(P, mcdc)
 
 
 @njit
@@ -791,15 +1512,15 @@ def pct_combing_weight(seed, mcdc):
     tooth_end = math.floor((w_end - offset) / td) + 1
 
     # Locally sample particles from census bank
-    bank_source["size"] = 0
+    set_bank_size(bank_source, 0)
     idx = 0
     for i in range(tooth_start, tooth_end):
         tooth = i * td + offset
         idx += binary_search(tooth, w_cdf[idx:])
-        P = copy_particle(bank_census["particles"][idx])
+        P = copy_record(bank_census["particles"][idx])
         # Set weight
         P["w"] = td
-        add_particle(P, bank_source)
+        adapt.add_source(P, mcdc)
 
 
 # =============================================================================
@@ -832,6 +1553,17 @@ def shift_particle(P, shift):
     P["t"] += shift
 
 
+@for_cpu()
+def lost_particle(P):
+    with objmode():
+        print("A particle is lost at (", P["x"], P["y"], P["z"], ")")
+
+
+@for_gpu()
+def lost_particle(P):
+    pass
+
+
 @njit
 def get_particle_cell(P, universe_ID, trans, mcdc):
     """
@@ -844,9 +1576,8 @@ def get_particle_cell(P, universe_ID, trans, mcdc):
         if cell_check(P, cell, trans, mcdc):
             return cell["ID"]
 
+    lost_particle(P)
     # Particle is not found
-    with objmode():
-        print("A particle is lost at (", P["x"], P["y"], P["z"], ")")
     P["alive"] = False
     return -1
 
@@ -854,7 +1585,8 @@ def get_particle_cell(P, universe_ID, trans, mcdc):
 @njit
 def get_particle_material(P, mcdc):
     # Translation accumulator
-    trans = np.zeros(3)
+    trans_struct = adapt.local_translate()
+    trans = trans_struct["values"]
 
     # Top level cell
     cell = mcdc["cells"][P["cell_ID"]]
@@ -867,7 +1599,8 @@ def get_particle_material(P, mcdc):
             lattice = mcdc["lattices"][cell["lattice_ID"]]
 
             # Get lattice center for translation)
-            trans -= cell["lattice_center"]
+            for i in range(3):
+                trans[i] -= cell["lattice_center"][i]
 
             # Get universe
             mesh = lattice["mesh"]
@@ -899,8 +1632,7 @@ def get_particle_speed(P, mcdc):
 
 
 @njit
-def copy_particle(P):
-    P_new = np.zeros(1, dtype=type_.particle_record)[0]
+def copy_recordlike(P_new, P):
     P_new["x"] = P["x"]
     P_new["y"] = P["y"]
     P_new["z"] = P["z"]
@@ -913,12 +1645,57 @@ def copy_particle(P):
     P_new["w"] = P["w"]
     P_new["rng_seed"] = P["rng_seed"]
     P_new["sensitivity_ID"] = P["sensitivity_ID"]
+    P_new["iqmc"]["w"] = P["iqmc"]["w"]
+    copy_track_data(P_new, P)
+
+
+@njit
+def copy_record(P):
+    P_new = adapt.local_particle_record()
+    copy_recordlike(P_new, P)
     return P_new
 
 
+@njit
+def recordlike_to_particle(P_rec):
+    P_new = adapt.local_particle()
+    copy_recordlike(P_new, P_rec)
+    P_new["fresh"] = True
+    P_new["alive"] = True
+    P_new["material_ID"] = -1
+    P_new["cell_ID"] = -1
+    P_new["surface_ID"] = -1
+    P_new["event"] = -1
+    return P_new
+
+
+@njit
+def copy_particle(P_new, P):
+    P_new["x"] = P["x"]
+    P_new["y"] = P["y"]
+    P_new["z"] = P["z"]
+    P_new["t"] = P["t"]
+    P_new["ux"] = P["ux"]
+    P_new["uy"] = P["uy"]
+    P_new["uz"] = P["uz"]
+    P_new["g"] = P["g"]
+    P_new["w"] = P["w"]
+    P_new["alive"] = P["alive"]
+    P_new["fresh"] = P["fresh"]
+    P_new["material_ID"] = P["material_ID"]
+    P_new["cell_ID"] = P["cell_ID"]
+    P_new["surface_ID"] = P["surface_ID"]
+    P_new["translation"] = P["translation"]
+    P_new["event"] = P["event"]
+    P_new["sensitivity_ID"] = P["sensitivity_ID"]
+    P_new["rng_seed"] = P["rng_seed"]
+    P_new["iqmc"]["w"] = P["iqmc"]["w"]
+    copy_track_data(P_new, P)
+
+
 @njit
 def split_particle(P):
-    P_new = copy_particle(P)
+    P_new = copy_record(P)
     P_new["rng_seed"] = split_seed(P["rng_seed"], SEED_SPLIT_PARTICLE)
     rng(P)
     return P_new
@@ -1110,6 +1887,7 @@ def surface_distance(P, surface, trans, mcdc):
         t_max = surface["t"][idx + 1]
         d_max = (t_max - P["t"]) * v
 
+        div = G * ux + H * uy + I_ * uz + J1 / v
         distance = -surface_evaluate(P, surface, trans) / (
             G * ux + H * uy + I_ * uz + J1 / v
         )
@@ -1279,12 +2057,38 @@ def mesh_uniform_get_index(P, mesh, trans):
     Px = P["x"] + trans[0]
     Py = P["y"] + trans[1]
     Pz = P["z"] + trans[2]
-    x = math.floor((Px - mesh["x0"]) / mesh["dx"])
-    y = math.floor((Py - mesh["y0"]) / mesh["dy"])
-    z = math.floor((Pz - mesh["z0"]) / mesh["dz"])
+    x = numba.int64(math.floor((Px - mesh["x0"]) / mesh["dx"]))
+    y = numba.int64(math.floor((Py - mesh["y0"]) / mesh["dy"]))
+    z = numba.int64(math.floor((Pz - mesh["z0"]) / mesh["dz"]))
     return x, y, z
 
 
+@njit
+def mesh_crossing_evaluate(P, mesh):
+    # Shift backward
+    shift_particle(P, -2 * SHIFT)
+    t1, x1, y1, z1, outside1 = mesh_get_index(P, mesh)
+
+    # Double shift forward
+    shift_particle(P, 4 * SHIFT)
+    t2, x2, y2, z2, outside2 = mesh_get_index(P, mesh)
+
+    # Return particle to initial position
+    shift_particle(P, -2 * SHIFT)
+
+    # Determine dimension crossed
+    directions = []
+
+    if x1 != x2:
+        directions.append(MESH_X)
+    if y1 != y2:
+        directions.append(MESH_Y)
+    if z1 != z2:
+        directions.append(MESH_Z)
+
+    return x1, y1, z1, t1, directions
+
+
 # =============================================================================
 # Tally operations
 # =============================================================================
@@ -1344,14 +2148,18 @@ def score_exit(P, x, mcdc):
 
 @njit
 def score_flux(s, g, t, x, y, z, mu, azi, flux, score):
-    score["bin"][s, g, t, x, y, z, mu, azi] += flux
+    # score["bin"][s, g, t, x, y, z, mu, azi] += flux
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, mu, azi), flux)
 
 
 @njit
 def score_current(s, g, t, x, y, z, flux, P, score):
-    score["bin"][s, g, t, x, y, z, 0] += flux * P["ux"]
-    score["bin"][s, g, t, x, y, z, 1] += flux * P["uy"]
-    score["bin"][s, g, t, x, y, z, 2] += flux * P["uz"]
+    # score["bin"][s, g, t, x, y, z, 0] += flux * P["ux"]
+    # score["bin"][s, g, t, x, y, z, 1] += flux * P["uy"]
+    # score["bin"][s, g, t, x, y, z, 2] += flux * P["uz"]
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 0), flux * P["ux"])
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 1), flux * P["uy"])
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 2), flux * P["uz"])
 
 
 @njit
@@ -1359,12 +2167,18 @@ def score_eddington(s, g, t, x, y, z, flux, P, score):
     ux = P["ux"]
     uy = P["uy"]
     uz = P["uz"]
-    score["bin"][s, g, t, x, y, z, 0] += flux * ux * ux
-    score["bin"][s, g, t, x, y, z, 1] += flux * ux * uy
-    score["bin"][s, g, t, x, y, z, 2] += flux * ux * uz
-    score["bin"][s, g, t, x, y, z, 3] += flux * uy * uy
-    score["bin"][s, g, t, x, y, z, 4] += flux * uy * uz
-    score["bin"][s, g, t, x, y, z, 5] += flux * uz * uz
+    # score["bin"][s, g, t, x, y, z, 0] += flux * ux * ux
+    # score["bin"][s, g, t, x, y, z, 1] += flux * ux * uy
+    # score["bin"][s, g, t, x, y, z, 2] += flux * ux * uz
+    # score["bin"][s, g, t, x, y, z, 3] += flux * uy * uy
+    # score["bin"][s, g, t, x, y, z, 4] += flux * uy * uz
+    # score["bin"][s, g, t, x, y, z, 5] += flux * uz * uz
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 0), flux * ux * ux)
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 1), flux * ux * uy)
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 2), flux * ux * uz)
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 3), flux * uy * uy)
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 4), flux * uy * uz)
+    adapt.global_add(score["bin"], (s, g, t, x, y, z, 5), flux * uz * uz)
 
 
 @njit
@@ -1458,13 +2272,15 @@ def eigenvalue_tally(P, distance, mcdc):
     nuSigmaF = get_MacroXS(XS_NU_FISSION, material, P, mcdc)
 
     # Fission production (needed even during inactive cycle)
-    mcdc["eigenvalue_tally_nuSigmaF"] += flux * nuSigmaF
+    # mcdc["eigenvalue_tally_nuSigmaF"][0] += flux * nuSigmaF
+    adapt.global_add(mcdc["eigenvalue_tally_nuSigmaF"], 0, flux * nuSigmaF)
 
     if mcdc["cycle_active"]:
         # Neutron density
         v = get_particle_speed(P, mcdc)
         n_density = flux / v
-        mcdc["eigenvalue_tally_n"] += n_density
+        # mcdc["eigenvalue_tally_n"][0] += n_density
+        adapt.global_add(mcdc["eigenvalue_tally_n"], 0, n_density)
         # Maximum neutron density
         if mcdc["n_max"] < n_density:
             mcdc["n_max"] = n_density
@@ -1489,11 +2305,12 @@ def eigenvalue_tally(P, distance, mcdc):
                 ID_nuclide = material["nuclide_IDs"][i]
                 nuclide = mcdc["nuclides"][ID_nuclide]
                 for j in range(J):
-                    nu_d = get_nu(NU_FISSION_DELAYED, nuclide, E, j)
+                    nu_d = get_nu_group(NU_FISSION_DELAYED, nuclide, E, j)
                     decay = nuclide["ce_decay"][j]
                     total += nu_d / decay
         C_density = flux * total * SigmaF / mcdc["k_eff"]
-        mcdc["eigenvalue_tally_C"] += C_density
+        # mcdc["eigenvalue_tally_C"][0] += C_density
+        adapt.global_add(mcdc["eigenvalue_tally_C"], 0, C_density)
         # Maximum precursor density
         if mcdc["C_max"] < C_density:
             mcdc["C_max"] = C_density
@@ -1514,20 +2331,20 @@ def eigenvalue_tally_closeout_history(mcdc):
     buff_IC_fission = np.zeros(1, np.float64)
     with objmode():
         MPI.COMM_WORLD.Allreduce(
-            np.array([mcdc["eigenvalue_tally_nuSigmaF"]]), buff_nuSigmaF, MPI.SUM
+            np.array(mcdc["eigenvalue_tally_nuSigmaF"]), buff_nuSigmaF, MPI.SUM
         )
         if mcdc["cycle_active"]:
             MPI.COMM_WORLD.Allreduce(
-                np.array([mcdc["eigenvalue_tally_n"]]), buff_n, MPI.SUM
+                np.array(mcdc["eigenvalue_tally_n"]), buff_n, MPI.SUM
             )
             MPI.COMM_WORLD.Allreduce(np.array([mcdc["n_max"]]), buff_nmax, MPI.MAX)
             MPI.COMM_WORLD.Allreduce(
-                np.array([mcdc["eigenvalue_tally_C"]]), buff_C, MPI.SUM
+                np.array(mcdc["eigenvalue_tally_C"]), buff_C, MPI.SUM
             )
             MPI.COMM_WORLD.Allreduce(np.array([mcdc["C_max"]]), buff_Cmax, MPI.MAX)
             if mcdc["technique"]["IC_generator"]:
                 MPI.COMM_WORLD.Allreduce(
-                    np.array([mcdc["technique"]["IC_fission_score"]]),
+                    np.array(mcdc["technique"]["IC_fission_score"]),
                     buff_IC_fission,
                     MPI.SUM,
                 )
@@ -1567,10 +2384,10 @@ def eigenvalue_tally_closeout_history(mcdc):
             mcdc["technique"]["IC_fission"] += tally_IC_fission
 
     # Reset accumulators
-    mcdc["eigenvalue_tally_nuSigmaF"] = 0.0
-    mcdc["eigenvalue_tally_n"] = 0.0
-    mcdc["eigenvalue_tally_C"] = 0.0
-    mcdc["technique"]["IC_fission_score"] = 0.0
+    mcdc["eigenvalue_tally_nuSigmaF"][0] = 0.0
+    mcdc["eigenvalue_tally_n"][0] = 0.0
+    mcdc["eigenvalue_tally_C"][0] = 0.0
+    mcdc["technique"]["IC_fission_score"][0] = 0.0
 
     # =====================================================================
     # Gyration radius
@@ -1578,7 +2395,7 @@ def eigenvalue_tally_closeout_history(mcdc):
 
     if mcdc["setting"]["gyration_radius"]:
         # Center of mass
-        N_local = mcdc["bank_census"]["size"]
+        N_local = get_bank_size(mcdc["bank_census"])
         total_local = np.zeros(4, np.float64)  # [x,y,z,W]
         total = np.zeros(4, np.float64)
         for i in range(N_local):
@@ -1670,11 +2487,16 @@ def move_to_event(P, mcdc):
     # Also set particle material and speed
     d_boundary, event = distance_to_boundary(P, mcdc)
 
+    # print(P["material_ID"])
     # Distance to tally mesh
     d_mesh = INF
     if mcdc["cycle_active"]:
         d_mesh = distance_to_mesh(P, mcdc["tally"]["mesh"], mcdc)
 
+    d_domain = INF
+    if mcdc["cycle_active"] and mcdc["technique"]["domain_decomposition"]:
+        d_domain = distance_to_mesh(P, mcdc["technique"]["dd_mesh"], mcdc)
+
     if mcdc["technique"]["iQMC"]:
         d_iqmc_mesh = distance_to_mesh(P, mcdc["technique"]["iqmc"]["mesh"], mcdc)
         if d_iqmc_mesh < d_mesh:
@@ -1701,7 +2523,9 @@ def move_to_event(P, mcdc):
     # =========================================================================
 
     # Find the minimum
-    distance = min(d_boundary, d_time_boundary, d_time_census, d_mesh, d_collision)
+    distance = min(
+        d_boundary, d_time_boundary, d_time_census, d_mesh, d_collision, d_domain
+    )
     # Remove the boundary event if it is not the nearest
     if d_boundary > distance * PREC:
         event = 0
@@ -1713,6 +2537,8 @@ def move_to_event(P, mcdc):
         event += EVENT_CENSUS
     if d_mesh <= distance * PREC:
         event += EVENT_MESH
+    if d_domain <= distance * PREC:
+        event += EVENT_DOMAIN
     if d_collision == distance:
         event = EVENT_COLLISION
 
@@ -1729,7 +2555,7 @@ def move_to_event(P, mcdc):
             track_particle(P, mcdc)
         iqmc_score_tallies(P, distance, mcdc)
         iqmc_continuous_weight_reduction(P, distance, mcdc)
-        if np.abs(P["w"]) <= mcdc["technique"]["iqmc"]["w_min"]:
+        if abs(P["w"]) <= mcdc["technique"]["iqmc"]["w_min"]:
             P["alive"] = False
 
     # Score tracklength tallies
@@ -1772,7 +2598,8 @@ def distance_to_boundary(P, mcdc):
     event = 0
 
     # Translation accumulator
-    trans = np.zeros(3)
+    trans_struct = adapt.local_translate()
+    trans = trans_struct["values"]
 
     # Top level cell
     cell = mcdc["cells"][P["cell_ID"]]
@@ -1789,7 +2616,8 @@ def distance_to_boundary(P, mcdc):
             distance = d_surface
             event = EVENT_SURFACE
             P["surface_ID"] = surface_ID
-            P["translation"][:] = trans
+            for i in range(3):
+                P["translation"][i] = trans[i]
 
             if surface_move:
                 event = EVENT_SURFACE_MOVE
@@ -1800,7 +2628,8 @@ def distance_to_boundary(P, mcdc):
             lattice = mcdc["lattices"][cell["lattice_ID"]]
 
             # Get lattice center for translation)
-            trans -= cell["lattice_center"]
+            for i in range(3):
+                trans[i] -= cell["lattice_center"][i]
 
             # Distance to lattice
             d_lattice = distance_to_lattice(P, lattice, trans)
@@ -1892,7 +2721,12 @@ def distance_to_mesh(P, mesh, mcdc):
 
 
 @njit
-def surface_crossing(P, mcdc):
+def surface_crossing(P, prog):
+
+    mcdc = adapt.device(prog)
+
+    trans_struct = adapt.local_translate()
+    trans = trans_struct["values"]
     trans = P["translation"]
 
     # Implement BC
@@ -1916,7 +2750,8 @@ def surface_crossing(P, mcdc):
     if P["alive"] and not surface["reflective"]:
         cell = mcdc["cells"][P["cell_ID"]]
         if not cell_check(P, cell, trans, mcdc):
-            trans = np.zeros(3)
+            trans_struct = adapt.local_translate()
+            trans = trans_struct["values"]
             P["cell_ID"] = get_particle_cell(P, 0, trans, mcdc)
 
     # Sensitivity quantification for surface?
@@ -1928,7 +2763,7 @@ def surface_crossing(P, mcdc):
         material_ID_new = get_particle_material(P, mcdc)
         if material_ID_old != material_ID_new:
             # Sample derivative source particles
-            sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc)
+            sensitivity_surface(P, surface, material_ID_old, material_ID_new, prog)
 
 
 # =============================================================================
@@ -1989,7 +2824,8 @@ def capture(P, mcdc):
 
 
 @njit
-def scattering(P, mcdc):
+def scattering(P, prog):
+    mcdc = adapt.device(prog)
     # Kill the current particle
     P["alive"] = False
 
@@ -2030,7 +2866,7 @@ def scattering(P, mcdc):
             P["E"] = P_new["E"]
             P["w"] = P_new["w"]
         else:
-            add_particle(P_new, mcdc["bank_active"])
+            adapt.add_active(P_new, prog)
 
 
 @njit
@@ -2248,7 +3084,9 @@ def scatter_direction(ux, uy, uz, mu0, azi):
 
 
 @njit
-def fission(P, mcdc):
+def fission(P, prog):
+    mcdc = adapt.device(prog)
+
     # Kill the current particle
     P["alive"] = False
 
@@ -2294,9 +3132,9 @@ def fission(P, mcdc):
         # Bank
         idx_census = mcdc["idx_census"]
         if P_new["t"] > mcdc["setting"]["census_time"][idx_census]:
-            add_particle(P_new, mcdc["bank_census"])
+            adapt.add_census(P_new, prog)
         elif mcdc["setting"]["mode_eigenvalue"]:
-            add_particle(P_new, mcdc["bank_census"])
+            adapt.add_census(P_new, prog)
         else:
             # Keep it if it is the last particle
             if n == N - 1:
@@ -2309,7 +3147,7 @@ def fission(P, mcdc):
                 P["E"] = P_new["E"]
                 P["w"] = P_new["w"]
             else:
-                add_particle(P_new, mcdc["bank_active"])
+                adapt.add_active(P_new, prog)
 
 
 @njit
@@ -2450,9 +3288,10 @@ def fission_CE(P, nuclide, P_new):
     J = 6
     nu = get_nu(NU_FISSION, nuclide, E)
     nu_p = get_nu(NU_FISSION_PROMPT, nuclide, E)
-    nu_d = np.zeros(J)
+    nu_d_struct = adapt.local_j_array()
+    nu_d = nu_d_struct["values"]
     for j in range(J):
-        nu_d[j] = get_nu(NU_FISSION_DELAYED, nuclide, E, j)
+        nu_d[j] = get_nu_group(NU_FISSION_DELAYED, nuclide, E, j)
 
     # Delayed?
     prompt = True
@@ -2515,7 +3354,9 @@ def fission_CE(P, nuclide, P_new):
 
 
 @njit
-def branchless_collision(P, mcdc):
+def branchless_collision(P, prog):
+    mcdc = adapt.device(prog)
+
     material = mcdc["materials"][P["material_ID"]]
 
     # Adjust weight
@@ -2539,7 +3380,7 @@ def branchless_collision(P, mcdc):
             idx_census = mcdc["idx_census"]
             if P["t"] > mcdc["setting"]["census_time"][idx_census]:
                 P["alive"] = False
-                add_particle(split_particle(P), mcdc["bank_census"])
+                adapt.add_active(split_particle(P), prog)
             elif P["t"] > mcdc["setting"]["time_boundary"]:
                 P["alive"] = False
 
@@ -2550,7 +3391,9 @@ def branchless_collision(P, mcdc):
 
 
 @njit
-def weight_window(P, mcdc):
+def weight_window(P, prog):
+    mcdc = adapt.device(prog)
+
     # Get indices
     t, x, y, z, outside = mesh_get_index(P, mcdc["technique"]["ww_mesh"])
 
@@ -2574,13 +3417,13 @@ def weight_window(P, mcdc):
         # Splitting (keep the original particle)
         n_split = math.floor(p)
         for i in range(n_split - 1):
-            add_particle(split_particle(P), mcdc["bank_active"])
+            adapt.add_active(split_particle(P), prog)
 
         # Russian roulette
         p -= n_split
         xi = rng(P)
         if xi <= p:
-            add_particle(split_particle(P), mcdc["bank_active"])
+            adapt.add_active(split_particle(P), prog)
 
     # Below target
     elif p < 1.0 / width:
@@ -2597,7 +3440,7 @@ def weight_window(P, mcdc):
 # ==============================================================================
 
 
-@njit
+@toggle("iQMC")
 def iqmc_continuous_weight_reduction(P, distance, mcdc):
     """
     Continuous weight reduction technique based on particle track-length, for
@@ -2624,7 +3467,7 @@ def iqmc_continuous_weight_reduction(P, distance, mcdc):
     P["w"] = P["iqmc"]["w"].sum()
 
 
-@njit
+@toggle("iQMC")
 def iqmc_preprocess(mcdc):
     # set bank source
     iqmc = mcdc["technique"]["iqmc"]
@@ -2641,7 +3484,7 @@ def iqmc_preprocess(mcdc):
     iqmc_consolidate_sources(mcdc)
 
 
-@njit
+@toggle("iQMC")
 def iqmc_prepare_nuSigmaF(mcdc):
     iqmc = mcdc["technique"]["iqmc"]
     mesh = iqmc["mesh"]
@@ -2663,7 +3506,7 @@ def iqmc_prepare_nuSigmaF(mcdc):
                     )
 
 
-@njit
+@toggle("iQMC")
 def iqmc_prepare_source(mcdc):
     """
     Iterates trhough all spatial cells to calculate the iQMC source. The source
@@ -2703,7 +3546,7 @@ def iqmc_prepare_source(mcdc):
     iqmc_update_source(mcdc)
 
 
-@njit
+@toggle("iQMC")
 def iqmc_prepare_particles(mcdc):
     """
     Create N_particles assigning the position, direction, and group from the
@@ -2736,7 +3579,7 @@ def iqmc_prepare_particles(mcdc):
 
     for n in range(N_work):
         # Create new particle
-        P_new = np.zeros(1, dtype=type_.particle_record)[0]
+        P_new = adapt.local_particle_record()
         # assign initial group, time, and rng_seed (not used)
         P_new["g"] = 0
         P_new["t"] = 0
@@ -2758,10 +3601,10 @@ def iqmc_prepare_particles(mcdc):
         P_new["iqmc"]["w"] = q * dV * N_total / N_particle
         P_new["w"] = P_new["iqmc"]["w"].sum()
         # add to source bank
-        add_particle(P_new, mcdc["bank_source"])
+        adapt.add_source(P_new, mcdc)
 
 
-@njit
+@toggle("iQMC")
 def iqmc_res(flux_new, flux_old):
     """
 
@@ -2786,7 +3629,7 @@ def iqmc_res(flux_new, flux_old):
     return (flux_new - flux_old) / flux_old
 
 
-@njit
+@toggle("iQMC")
 def iqmc_score_tallies(P, distance, mcdc):
     """
 
@@ -2863,62 +3706,8 @@ def iqmc_score_tallies(P, distance, mcdc):
         tilt = iqmc_linear_tilt(P["uz"], P["z"], dz, z_mid, dx, dy, w, distance, SigmaT)
         score_bin["tilt-z"][:, t, x, y, z] += iqmc_effective_source(tilt, mat_id, mcdc)
 
-    if score_list["tilt-xy"]:
-        tilt = iqmc_bilinear_tilt(
-            P["ux"],
-            P["x"],
-            dx,
-            x_mid,
-            P["uy"],
-            P["y"],
-            dy,
-            y_mid,
-            dt,
-            dz,
-            w,
-            distance,
-            SigmaT,
-        )
-        score_bin["tilt-xy"][:, t, x, y, z] += iqmc_effective_source(tilt, mat_id, mcdc)
-
-    if score_list["tilt-xz"]:
-        tilt = iqmc_bilinear_tilt(
-            P["ux"],
-            P["x"],
-            dx,
-            x_mid,
-            P["uz"],
-            P["z"],
-            dz,
-            z_mid,
-            dt,
-            dy,
-            w,
-            distance,
-            SigmaT,
-        )
-        score_bin["tilt-xz"][:, t, x, y, z] += iqmc_effective_source(tilt, mat_id, mcdc)
-
-    if score_list["tilt-yz"]:
-        tilt = iqmc_bilinear_tilt(
-            P["uy"],
-            P["y"],
-            dy,
-            y_mid,
-            P["uz"],
-            P["z"],
-            dz,
-            z_mid,
-            dt,
-            dx,
-            w,
-            distance,
-            SigmaT,
-        )
-        score_bin["tilt-yz"][:, t, x, y, z] += iqmc_effective_source(tilt, mat_id, mcdc)
-
 
-@njit
+@toggle("iQMC")
 def iqmc_cell_volume(x, y, z, mesh):
     """
     Calculate the volume of the current spatial cell.
@@ -2951,12 +3740,12 @@ def iqmc_cell_volume(x, y, z, mesh):
     return dV
 
 
-@njit
+@toggle("iQMC")
 def iqmc_sample_position(a, b, sample):
     return a + (b - a) * sample
 
 
-@njit
+@toggle("iQMC")
 def iqmc_sample_isotropic_direction(sample1, sample2):
     """
 
@@ -2991,7 +3780,7 @@ def iqmc_sample_isotropic_direction(sample1, sample2):
     return ux, uy, uz
 
 
-@njit
+@toggle("iQMC")
 def iqmc_sample_group(sample, G):
     """
     Uniformly sample energy group using a random sample between [0,1].
@@ -3012,7 +3801,7 @@ def iqmc_sample_group(sample, G):
     return int(np.floor(sample * G))
 
 
-@njit
+@toggle("iQMC")
 def iqmc_generate_material_idx(mcdc):
     """
     This algorithm is meant to loop through every spatial cell of the
@@ -3032,9 +3821,10 @@ def iqmc_generate_material_idx(mcdc):
     Nz = len(mesh["z"]) - 1
     dx = dy = dz = 1
     # variables for cell finding functions
-    trans = np.zeros((3,))
+    trans_struct = adapt.local_translate()
+    trans = trans_struct["values"]
     # create particle to utilize cell finding functions
-    P_temp = np.zeros(1, dtype=type_.particle)[0]
+    P_temp = adapt.local_particle()
     # set default attributes
     P_temp["alive"] = True
     P_temp["material_ID"] = -1
@@ -3069,7 +3859,7 @@ def iqmc_generate_material_idx(mcdc):
                     mcdc["technique"]["iqmc"]["material_idx"][t, i, j, k] = material_ID
 
 
-@njit
+@toggle("iQMC")
 def iqmc_reset_tallies(iqmc):
     score_bin = iqmc["score"]
     score_list = iqmc["score_list"]
@@ -3080,7 +3870,7 @@ def iqmc_reset_tallies(iqmc):
             score_bin[name].fill(0.0)
 
 
-@njit
+@toggle("iQMC")
 def iqmc_distribute_tallies(iqmc):
     score_bin = iqmc["score"]
     score_list = iqmc["score_list"]
@@ -3090,7 +3880,7 @@ def iqmc_distribute_tallies(iqmc):
             iqmc_score_reduce_bin(score_bin[name])
 
 
-@njit
+@toggle("iQMC")
 def iqmc_score_reduce_bin(score):
     # MPI Reduce
     buff = np.zeros_like(score)
@@ -3099,7 +3889,7 @@ def iqmc_score_reduce_bin(score):
     score[:] = buff
 
 
-@njit
+@toggle("iQMC")
 def iqmc_update_source(mcdc):
     iqmc = mcdc["technique"]["iqmc"]
     keff = mcdc["k_eff"]
@@ -3115,7 +3905,7 @@ def iqmc_update_source(mcdc):
     iqmc["source"] = scatter + (fission / keff) + fixed
 
 
-@njit
+@toggle("iQMC")
 def iqmc_tilt_source(t, x, y, z, P, Q, mcdc):
     iqmc = mcdc["technique"]["iqmc"]
     score_list = iqmc["score_list"]
@@ -3136,18 +3926,9 @@ def iqmc_tilt_source(t, x, y, z, P, Q, mcdc):
     # linear z-component
     if score_list["tilt-z"]:
         Q += score_bin["tilt-z"][:, t, x, y, z] * (P["z"] - z_mid)
-    # bilinear xy
-    if score_list["tilt-xy"]:
-        Q += score_bin["tilt-xy"][:, t, x, y, z] * (P["x"] - x_mid) * (P["y"] - y_mid)
-    # bilinear xz
-    if score_list["tilt-xz"]:
-        Q += score_bin["tilt-xz"][:, t, x, y, z] * (P["x"] - x_mid) * (P["z"] - z_mid)
-    # bilinear yz
-    if score_list["tilt-yz"]:
-        Q += score_bin["tilt-yz"][:, t, x, y, z] * (P["y"] - y_mid) * (P["z"] - z_mid)
 
 
-@njit
+@toggle("iQMC")
 def iqmc_distribute_sources(mcdc):
     """
     This function is meant to distribute iqmc_total_source to the relevant
@@ -3172,33 +3953,15 @@ def iqmc_distribute_sources(mcdc):
     score_bin = iqmc["score"]
     Vsize = 0
 
-    # effective sources
-    # in Davidsons method we need to separate scattering and fission
-    # in all other methods we can combine them into one
-    if mcdc["setting"]["mode_eigenvalue"] and iqmc["eigenmode_solver"] == "davidson":
-        # effective scattering
-        score_bin["effective-scattering"] = np.reshape(
-            total_source[Vsize : (Vsize + size)].copy(), shape
-        )
-        Vsize += size
-        # effective fission
-        score_bin["effective-fission"] = np.reshape(
-            total_source[Vsize : (Vsize + size)].copy(), shape
-        )
-        Vsize += size
-    else:
-        # effective source
-        iqmc["source"] = np.reshape(total_source[Vsize : (Vsize + size)].copy(), shape)
-        Vsize += size
+    # effective source
+    iqmc["source"] = np.reshape(total_source[Vsize : (Vsize + size)].copy(), shape)
+    Vsize += size
 
     # source tilting arrays
     tilt_list = [
         "tilt-x",
         "tilt-y",
         "tilt-z",
-        "tilt-xy",
-        "tilt-xz",
-        "tilt-yz",
     ]
     for name in literal_unroll(tilt_list):
         if score_list[name]:
@@ -3206,7 +3969,7 @@ def iqmc_distribute_sources(mcdc):
             Vsize += size
 
 
-@njit
+@toggle("iQMC")
 def iqmc_consolidate_sources(mcdc):
     """
     This function is meant to collect the relevant invidual source
@@ -3230,33 +3993,15 @@ def iqmc_consolidate_sources(mcdc):
     score_bin = iqmc["score"]
     Vsize = 0
 
-    # effective sources
-    # in Davidsons method we need to separate scattering and fission
-    # in all other methods we can combine them into one
-    if mcdc["setting"]["mode_eigenvalue"] and iqmc["eigenmode_solver"] == "davidson":
-        # effective scattering array
-        total_source[Vsize : (Vsize + size)] = np.reshape(
-            score_bin["effective-scattering"].copy(), size
-        )
-        Vsize += size
-        # effective fission array
-        total_source[Vsize : (Vsize + size)] = np.reshape(
-            score_bin["effective-fission"].copy(), size
-        )
-        Vsize += size
-    else:
-        # effective source
-        total_source[Vsize : (Vsize + size)] = np.reshape(iqmc["source"].copy(), size)
-        Vsize += size
+    # effective source
+    total_source[Vsize : (Vsize + size)] = np.reshape(iqmc["source"].copy(), size)
+    Vsize += size
 
     # source tilting arrays
     tilt_list = [
         "tilt-x",
         "tilt-y",
         "tilt-z",
-        "tilt-xy",
-        "tilt-xz",
-        "tilt-yz",
     ]
     for name in literal_unroll(tilt_list):
         if score_list[name]:
@@ -3270,7 +4015,7 @@ def iqmc_consolidate_sources(mcdc):
 # TODO: Not all ST tallies have been built for case where SigmaT = 0.0
 
 
-@njit
+@toggle("iQMC")
 def iqmc_flux(SigmaT, w, distance, dV):
     # Score Flux
     if SigmaT.all() > 0.0:
@@ -3279,20 +4024,20 @@ def iqmc_flux(SigmaT, w, distance, dV):
         return distance * w / dV
 
 
-@njit
+@toggle("iQMC")
 def iqmc_fission_source(phi, material):
     SigmaF = material["fission"]
     nu_f = material["nu_f"]
     return np.sum(nu_f * SigmaF * phi)
 
 
-@njit
+@toggle("iQMC")
 def iqmc_fission_power(phi, material):
     SigmaF = material["fission"]
     return SigmaF * phi
 
 
-@njit
+@toggle("iQMC")
 def iqmc_effective_fission(phi, mat_id, mcdc):
     """
     Calculate the fission source for use with iQMC.
@@ -3327,7 +4072,7 @@ def iqmc_effective_fission(phi, mat_id, mcdc):
     return F
 
 
-@njit
+@toggle("iQMC")
 def iqmc_effective_scattering(phi, mat_id, mcdc):
     """
     Calculate the scattering source for use with iQMC.
@@ -3353,14 +4098,14 @@ def iqmc_effective_scattering(phi, mat_id, mcdc):
     return np.dot(chi_s.T, SigmaS * phi)
 
 
-@njit
+@toggle("iQMC")
 def iqmc_effective_source(phi, mat_id, mcdc):
     S = iqmc_effective_scattering(phi, mat_id, mcdc)
     F = iqmc_effective_fission(phi, mat_id, mcdc)
     return S + F
 
 
-@njit
+@toggle("iQMC")
 def iqmc_linear_tilt(mu, x, dx, x_mid, dy, dz, w, distance, SigmaT):
     if SigmaT.all() > 1e-12:
         a = mu * (
@@ -3373,34 +4118,7 @@ def iqmc_linear_tilt(mu, x, dx, x_mid, dy, dz, w, distance, SigmaT):
     return Q
 
 
-@njit
-def iqmc_bilinear_tilt(ux, x, dx, x_mid, uy, y, dy, y_mid, dt, dz, w, S, SigmaT):
-    # TODO: integral incase of SigmaT = 0
-    Q = (
-        (1 / SigmaT**3)
-        * w
-        * (
-            (x - x_mid) * SigmaT * (uy + (y - y_mid) * SigmaT)
-            + ux * (2 * uy + (y - y_mid) * SigmaT)
-            + np.exp(-S * SigmaT)
-            * (
-                -2 * ux * uy
-                + ((-x + x_mid) * uy + ux * (-y + y_mid - 2 * S * uy)) * SigmaT
-                - (x - x_mid + S * ux) * (y - y_mid + S * uy) * SigmaT**2
-            )
-        )
-    )
-
-    Q *= 144 / (dt * dx**3 * dy**3 * dz)
-    return Q
-
-
-# =============================================================================
-# iQMC Iterative Method Mapping Functions
-# =============================================================================
-
-
-@njit
+@toggle("iQMC")
 def AxV(V, b, mcdc):
     """
     Linear operator to be used with GMRES.
@@ -3412,7 +4130,8 @@ def AxV(V, b, mcdc):
     # distribute segments of V to appropriate sources
     iqmc_distribute_sources(mcdc)
     # reset bank size
-    mcdc["bank_source"]["size"] = 0
+    set_bank_size(mcdc["bank_source"], 0)
+
     # QMC Sweep
     iqmc_prepare_particles(mcdc)
     iqmc_reset_tallies(iqmc)
@@ -3430,98 +4149,6 @@ def AxV(V, b, mcdc):
     return axv
 
 
-@njit
-def HxV(V, mcdc):
-    """
-    Linear operator for Davidson method,
-    scattering + streaming terms -> (I-L^(-1)S)*phi
-    """
-    iqmc = mcdc["technique"]["iqmc"]
-    # flux input is most recent iteration of eigenvector
-    v = V[:, -1]
-    iqmc["total_source"] = v.copy()
-    iqmc_distribute_sources(mcdc)
-    # reset bank size
-    mcdc["bank_source"]["size"] = 0
-
-    # QMC Sweep
-    # prepare_qmc_scattering_source(mcdc)
-    iqmc["source"] = iqmc["fixed_source"] + iqmc["score"]["effective-scattering"]
-    iqmc_prepare_particles(mcdc)
-    iqmc_reset_tallies(iqmc)
-    iqmc["sweep_counter"] += 1
-    loop_source(0, mcdc)
-    # sum resultant flux on all processors
-    iqmc_distribute_tallies(iqmc)
-    iqmc_consolidate_sources(mcdc)
-    v_out = iqmc["total_source"].copy()
-    axv = v - v_out
-
-    return axv
-
-
-@njit
-def FxV(V, mcdc):
-    """
-    Linear operator for Davidson method,
-    fission term -> (L^(-1)F*phi)
-    """
-    iqmc = mcdc["technique"]["iqmc"]
-    # flux input is most recent iteration of eigenvector
-    v = V[:, -1]
-    # reshape v and assign to iqmc_flux
-    iqmc["total_source"] = v.copy()
-    iqmc_distribute_sources(mcdc)
-    # reset bank size
-    mcdc["bank_source"]["size"] = 0
-
-    # QMC Sweep
-    iqmc["source"] = iqmc["fixed_source"] + iqmc["score"]["effective-fission"]
-    iqmc_prepare_particles(mcdc)
-    iqmc_reset_tallies(iqmc)
-    iqmc["sweep_counter"] += 1
-    loop_source(0, mcdc)
-
-    # sum resultant flux on all processors
-    iqmc_distribute_tallies(iqmc)
-    iqmc_consolidate_sources(mcdc)
-    v_out = iqmc["total_source"].copy()
-
-    return v_out
-
-
-@njit
-def preconditioner(V, mcdc, num_sweeps=3):
-    """
-    Linear operator approximation of (I-L^(-1)S)*phi
-
-    In this case the preconditioner is a specified number of purely scattering
-    transport sweeps.
-    """
-    iqmc = mcdc["technique"]["iqmc"]
-    # flux input is most recent iteration of eigenvector
-    iqmc["total_source"] = V.copy()
-    iqmc_distribute_sources(mcdc)
-
-    for i in range(num_sweeps):
-        # reset bank size
-        mcdc["bank_source"]["size"] = 0
-        # QMC Sweep
-        iqmc["source"] = iqmc["fixed_source"] + iqmc["score"]["effective-scattering"]
-        iqmc_prepare_particles(mcdc)
-        iqmc_reset_tallies(iqmc)
-        iqmc["sweep_counter"] += 1
-        loop_source(0, mcdc)
-        # sum resultant flux on all processors
-        iqmc_distribute_tallies(iqmc)
-
-    iqmc_consolidate_sources(mcdc)
-    v_out = iqmc["total_source"].copy()
-    v_out = V - v_out
-
-    return v_out
-
-
 # =============================================================================
 # Weight Roulette
 # =============================================================================
@@ -3545,7 +4172,10 @@ def weight_roulette(P, mcdc):
 
 
 @njit
-def sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc):
+def sensitivity_surface(P, surface, material_ID_old, material_ID_new, prog):
+
+    mcdc = adapt.device(prog)
+
     # Sample number of derivative sources
     xi = surface["dsm_Np"]
     if xi != 1.0:
@@ -3555,7 +4185,7 @@ def sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc):
 
     # Terminate and put the current particle into the secondary bank
     P["alive"] = False
-    add_particle(copy_particle(P), mcdc["bank_active"])
+    adapt.add_active(copy_record(P), prog)
 
     # Get sensitivity ID
     ID = surface["sensitivity_ID"]
@@ -3676,7 +4306,7 @@ def sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc):
                 P_new["z"] += nz * 2 * SHIFT
 
         # Put the current particle into the secondary bank
-        add_particle(P_new, mcdc["bank_active"])
+        adapt.add_active(P_new, prog)
 
     # Sample potential second-order sensitivity particles?
     if mcdc["technique"]["dsm_order"] < 2 or P["sensitivity_ID"] > 0:
@@ -3713,9 +4343,16 @@ def sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc):
         # Sample term
         xi = rng(P_new) * p_total
         tot = 0.0
-        for material_ID, sign in zip(
-            [material_ID_new, material_ID_old], [sign_new, sign_old]
-        ):
+
+        for idx in range(2):
+
+            if idx == 0:
+                material_ID = material_ID_old
+                sign = sign_old
+            else:
+                material_ID = material_ID_new
+                sign = sign_new
+
             material = mcdc["materials"][material_ID]
             if material["sensitivity"]:
                 N_nuclide = material["N_nuclide"]
@@ -3742,7 +4379,7 @@ def sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc):
                             # Delta source
                             P_new["w"] = -w * sign
                             P_new["sensitivity_ID"] = ID_source
-                            add_particle(P_new, mcdc["bank_active"])
+                            adapt.add_active(P_new, prog)
                             source_obtained = True
                         else:
                             P_new["w"] = w * sign
@@ -3754,7 +4391,7 @@ def sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc):
                                     P, nuclide, P_new, mcdc
                                 )
                                 P_new["sensitivity_ID"] = ID_source
-                                add_particle(P_new, mcdc["bank_active"])
+                                adapt.add_active(P_new, prog)
                                 source_obtained = True
                             else:
                                 tot += nusigmaF
@@ -3764,7 +4401,7 @@ def sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc):
                                         P, nuclide, P_new, mcdc
                                     )
                                     P_new["sensitivity_ID"] = ID_source
-                                    add_particle(P_new, mcdc["bank_active"])
+                                    adapt.add_active(P_new, prog)
                                     source_obtained = True
                     if source_obtained:
                         break
@@ -3773,7 +4410,10 @@ def sensitivity_surface(P, surface, material_ID_old, material_ID_new, mcdc):
 
 
 @njit
-def sensitivity_material(P, mcdc):
+def sensitivity_material(P, prog):
+
+    mcdc = adapt.device(prog)
+
     # The incident particle is already terminated
 
     # Get material
@@ -3863,7 +4503,7 @@ def sensitivity_material(P, mcdc):
         P_new["sensitivity_ID"] = ID
 
         # Put the current particle into the secondary bank
-        add_particle(P_new, mcdc["bank_active"])
+        adapt.add_active(P_new, prog)
 
 
 # ==============================================================================
@@ -3871,18 +4511,33 @@ def sensitivity_material(P, mcdc):
 # ==============================================================================
 
 
-@njit
+@toggle("particle_tracker")
 def track_particle(P, mcdc):
-    idx = mcdc["particle_track_N"]
-    mcdc["particle_track"][idx, 0] = mcdc["particle_track_history_ID"]
-    mcdc["particle_track"][idx, 1] = mcdc["particle_track_particle_ID"]
+    idx = adapt.global_add(mcdc["particle_track_N"], 0, 1)
+    mcdc["particle_track"][idx, 0] = P["track_hid"]
+    mcdc["particle_track"][idx, 1] = P["track_pid"]
     mcdc["particle_track"][idx, 2] = P["g"] + 1
     mcdc["particle_track"][idx, 3] = P["t"]
     mcdc["particle_track"][idx, 4] = P["x"]
     mcdc["particle_track"][idx, 5] = P["y"]
     mcdc["particle_track"][idx, 6] = P["z"]
     mcdc["particle_track"][idx, 7] = P["w"]
-    mcdc["particle_track_N"] += 1
+
+
+@toggle("particle_tracker")
+def copy_track_data(P_new, P):
+    P_new["track_hid"] = P["track_hid"]
+    P_new["track_pid"] = P["track_pid"]
+
+
+@toggle("particle_tracker")
+def allocate_hid(P, mcdc):
+    P["track_hid"] = adapt.global_add(mcdc["particle_track_history_ID"], 0, 1)
+
+
+@toggle("particle_tracker")
+def allocate_pid(P, mcdc):
+    P["track_pid"] = adapt.global_add(mcdc["particle_track_particle_ID"], 0, 1)
 
 
 # ==============================================================================
@@ -4019,7 +4674,7 @@ def get_microXS(type_, nuclide, E):
 @njit
 def get_XS(data, E, E_grid, NE):
     # Search XS energy bin index
-    idx = binary_search(E, E_grid, NE)
+    idx = binary_search_length(E, E_grid, NE)
 
     # Extrapolate if E is outside the given data
     if idx == -1:
@@ -4037,7 +4692,7 @@ def get_XS(data, E, E_grid, NE):
 
 
 @njit
-def get_nu(type_, nuclide, E, group=-1):
+def get_nu_group(type_, nuclide, E, group):
     if type_ == NU_FISSION:
         nu = get_XS(nuclide["ce_nu_p"], E, nuclide["E_nu_p"], nuclide["NE_nu_p"])
         for i in range(6):
@@ -4063,6 +4718,11 @@ def get_nu(type_, nuclide, E, group=-1):
         )
 
 
+@njit
+def get_nu(type_, nuclide, E):
+    return get_nu_group(type_, nuclide, E, -1)
+
+
 @njit
 def sample_nuclide(material, P, type_, mcdc):
     xi = rng(P) * get_MacroXS(type_, material, P, mcdc)
@@ -4084,7 +4744,7 @@ def sample_Eout(P_new, E_grid, NE, chi):
     xi = rng(P_new)
 
     # Determine bin index
-    idx = binary_search(xi, chi, NE)
+    idx = binary_search_length(xi, chi, NE)
 
     # Linear interpolation
     E1 = E_grid[idx]
@@ -4100,7 +4760,7 @@ def sample_Eout(P_new, E_grid, NE, chi):
 
 
 @njit
-def binary_search(val, grid, length=0):
+def binary_search_length(val, grid, length):
     """
     Binary search that returns the bin index of the value `val` given grid `grid`.
 
@@ -4126,6 +4786,11 @@ def binary_search(val, grid, length=0):
     return int(right)
 
 
+@njit
+def binary_search(val, grid):
+    return binary_search_length(val, grid, 0)
+
+
 @njit
 def lartg(f, g):
     """
diff --git a/mcdc/loop.py b/mcdc/loop.py
index 202edf34..f155ee8c 100644
--- a/mcdc/loop.py
+++ b/mcdc/loop.py
@@ -3,8 +3,12 @@
 from numba import njit, objmode, jit
 from scipy.linalg import eig
 
+from mpi4py import MPI
+
+import mcdc.adapt as adapt
 import mcdc.kernel as kernel
 import mcdc.type_ as type_
+import pathlib
 
 import mcdc.print_ as print_module
 
@@ -16,8 +20,48 @@
     print_progress_iqmc,
     print_iqmc_eigenvalue_progress,
     print_iqmc_eigenvalue_exit_code,
+    print_msg,
 )
 
+caching = True
+
+
+def set_cache(setting):
+    caching = setting
+
+    if setting == False:
+        print_msg(" Caching has been disabled")
+        # p.unlink() for p in pathlib.Path('.').rglob('*.py[co]')
+        # p.rmdir() for p in pathlib.Path('.').rglob('__pycache__')
+
+
+# =============================================================================
+# Functions for GPU Interop
+# =============================================================================
+
+# The symbols declared below will be overwritten to reference external code that
+# manages GPU execution (if GPU execution is supported and selected)
+alloc_state, free_state = [None] * 2
+
+src_alloc_program, src_free_program, src_load_state, src_store_state = [None] * 4
+src_init_program, src_exec_program, src_complete, src_clear_flags = [None] * 4
+
+pre_alloc_program, pre_free_program, pre_load_state, pre_store_state = [None] * 4
+pre_init_program, pre_exec_program, pre_complete, pre_clear_flags = [None] * 4
+
+
+# If GPU execution is supported and selected, the functions shown below will
+# be redefined to overwrite the above symbols and perform initialization/
+# finalization of GPU state
+@njit
+def setup_gpu(mcdc):
+    pass
+
+
+@njit
+def teardown_gpu(mcdc):
+    pass
+
 
 # =========================================================================
 # Fixed-source loop
@@ -29,7 +73,7 @@
 #     see more about cacheing here https://numba.readthedocs.io/en/stable/developer/caching.html
 
 
-@njit(cache=True)
+@njit(cache=caching)
 def loop_fixed_source(mcdc):
     # Loop over batches
     for idx_batch in range(mcdc["setting"]["N_batch"]):
@@ -54,7 +98,7 @@ def loop_fixed_source(mcdc):
             loop_source(seed_source, mcdc)
 
             # Loop over source precursors
-            if mcdc["bank_precursor"]["size"] > 0:
+            if kernel.get_bank_size(mcdc["bank_precursor"]) > 0:
                 seed_source_precursor = kernel.split_seed(
                     seed_census, SEED_SPLIT_SOURCE_PRECURSOR
                 )
@@ -71,9 +115,9 @@ def loop_fixed_source(mcdc):
         # Multi-batch closeout
         if mcdc["setting"]["N_batch"] > 1:
             # Reset banks
-            mcdc["bank_source"]["size"] = 0
-            mcdc["bank_census"]["size"] = 0
-            mcdc["bank_active"]["size"] = 0
+            kernel.set_bank_size(mcdc["bank_source"], 0)
+            kernel.set_bank_size(mcdc["bank_census"], 0)
+            kernel.set_bank_size(mcdc["bank_active"], 0)
 
             # Tally history closeout
             kernel.tally_reduce_bin(mcdc)
@@ -94,7 +138,7 @@ def loop_fixed_source(mcdc):
 # =========================================================================
 
 
-@njit(cache=True)
+@njit(cache=caching)
 def loop_eigenvalue(mcdc):
     # Loop over power iteration cycles
     for idx_cycle in range(mcdc["setting"]["N_cycle"]):
@@ -133,81 +177,266 @@ def loop_eigenvalue(mcdc):
 # =============================================================================
 
 
-@njit(cache=True)
+@njit(cache=caching)
+def generate_source_particle(work_start, idx_work, seed, prog):
+    mcdc = adapt.device(prog)
+
+    seed_work = kernel.split_seed(work_start + idx_work, seed)
+
+    # =====================================================================
+    # Get a source particle and put into active bank
+    # =====================================================================
+
+    # Get from fixed-source?
+    if kernel.get_bank_size(mcdc["bank_source"]) == 0:
+        # Sample source
+        P = kernel.source_particle(seed_work, mcdc)
+
+    # Get from source bank
+    else:
+        P = mcdc["bank_source"]["particles"][idx_work]
+
+    # Particle tracker
+    if mcdc["setting"]["track_particle"]:
+        kernel.allocate_hid(P, mcdc)
+
+    # Check if it is beyond current census index
+    idx_census = mcdc["idx_census"]
+    if P["t"] > mcdc["setting"]["census_time"][idx_census]:
+        if mcdc["technique"]["domain_decomposition"]:
+            if mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]] > 0:
+                P["w"] /= mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]]
+            if kernel.particle_in_domain(P, mcdc):
+                adapt.add_census(P, prog)
+        else:
+            adapt.add_census(P, prog)
+    else:
+        # Add the source particle into the active bank
+        if mcdc["technique"]["domain_decomposition"]:
+            if mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]] > 0:
+                P["w"] /= mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]]
+            if kernel.particle_in_domain(P, mcdc):
+                adapt.add_active(P, prog)
+        else:
+            adapt.add_active(P, prog)
+
+
+@njit(cache=caching)
+def prep_particle(P, prog):
+    mcdc = adapt.device(prog)
+
+    # Apply weight window
+    if mcdc["technique"]["weight_window"]:
+        kernel.weight_window(P, prog)
+
+    # Particle tracker
+    if mcdc["setting"]["track_particle"]:
+        kernel.allocate_pid(P, mcdc)
+
+
+@njit(cache=caching)
+def exhaust_active_bank(prog):
+    mcdc = adapt.device(prog)
+    P = adapt.local_particle()
+    # Loop until active bank is exhausted
+    while kernel.get_bank_size(mcdc["bank_active"]) > 0:
+        # Get particle from active bank
+        kernel.get_particle(P, mcdc["bank_active"], mcdc)
+
+        prep_particle(P, prog)
+
+        # Particle loop
+        loop_particle(P, mcdc)
+
+
+@njit(cache=caching)
+def source_closeout(prog, idx_work, N_prog):
+    mcdc = adapt.device(prog)
+
+    # Tally history closeout for one-batch fixed-source simulation
+    if not mcdc["setting"]["mode_eigenvalue"] and mcdc["setting"]["N_batch"] == 1:
+        kernel.tally_closeout_history(mcdc)
+
+    # Tally history closeout for multi-batch uq simulation
+    if mcdc["technique"]["uq"]:
+        kernel.uq_tally_closeout_history(mcdc)
+
+    # Progress printout
+    percent = (idx_work + 1.0) / mcdc["mpi_work_size"]
+    if mcdc["setting"]["progress_bar"] and int(percent * 100.0) > N_prog:
+        N_prog += 1
+        with objmode():
+            print_progress(percent, mcdc)
+
+
+@njit(cache=caching)
+def source_dd_resolution(prog):
+    mcdc = adapt.device(prog)
+
+    kernel.dd_particle_send(mcdc)
+    terminated = False
+    max_work = 1
+    kernel.dd_recv(mcdc)
+    if mcdc["domain_decomp"]["work_done"]:
+        terminated = True
+
+    while not terminated:
+        if kernel.get_bank_size(mcdc["bank_active"]) > 0:
+            P = adapt.local_particle()
+            # Loop until active bank is exhausted
+            while kernel.get_bank_size(mcdc["bank_active"]) > 0:
+
+                kernel.get_particle(P, mcdc["bank_active"], mcdc)
+                if not kernel.particle_in_domain(P, mcdc) and P["alive"] == True:
+                    print(f"recieved particle not in domain")
+
+                # Apply weight window
+                if mcdc["technique"]["weight_window"]:
+                    kernel.weight_window(P, mcdc)
+
+                # Particle tracker
+                if mcdc["setting"]["track_particle"]:
+                    mcdc["particle_track_particle_ID"] += 1
+
+                # Particle loop
+                loop_particle(P, mcdc)
+
+                # Tally history closeout for one-batch fixed-source simulation
+                if (
+                    not mcdc["setting"]["mode_eigenvalue"]
+                    and mcdc["setting"]["N_batch"] == 1
+                ):
+                    kernel.tally_closeout_history(mcdc)
+
+        # Send all domain particle banks
+        kernel.dd_particle_send(mcdc)
+
+        kernel.dd_recv(mcdc)
+
+        # Progress printout
+        """
+        percent = 1 - work_remaining / max_work
+        if mcdc["setting"]["progress_bar"] and int(percent * 100.0) > N_prog:
+            N_prog += 1
+            with objmode():
+                print_progress(percent, mcdc)
+        """
+        if kernel.dd_check_halt(mcdc):
+            kernel.dd_check_out(mcdc)
+            terminated = True
+
+
+@njit
 def loop_source(seed, mcdc):
     # Progress bar indicator
     N_prog = 0
 
+    if mcdc["technique"]["domain_decomposition"]:
+        kernel.dd_check_in(mcdc)
+
     # Loop over particle sources
     work_start = mcdc["mpi_work_start"]
     work_size = mcdc["mpi_work_size"]
     work_end = work_start + work_size
-    for idx_work in range(work_size):
-        seed_work = kernel.split_seed(work_start + idx_work, seed)
 
-        # Particle tracker
-        if mcdc["setting"]["track_particle"]:
-            mcdc["particle_track_history_ID"] += 1
+    for idx_work in range(work_size):
 
         # =====================================================================
-        # Get a source particle and put into active bank
+        # Generate a source particle
         # =====================================================================
 
-        # Get from fixed-source?
-        if mcdc["bank_source"]["size"] == 0:
-            # Sample source
-            P = kernel.source_particle(seed_work, mcdc)
+        generate_source_particle(work_start, idx_work, seed, mcdc)
 
-        # Get from source bank
-        else:
-            P = mcdc["bank_source"]["particles"][idx_work]
+        # =====================================================================
+        # Run the source particle and its secondaries
+        # =====================================================================
 
-        # Check if it is beyond current census index
-        idx_census = mcdc["idx_census"]
-        if P["t"] > mcdc["setting"]["census_time"][idx_census]:
-            kernel.add_particle(P, mcdc["bank_census"])
-        else:
-            # Add the source particle into the active bank
-            kernel.add_particle(P, mcdc["bank_active"])
+        exhaust_active_bank(mcdc)
 
         # =====================================================================
-        # Run the source particle and its secondaries
+        # Closeout
         # =====================================================================
 
-        # Loop until active bank is exhausted
-        while mcdc["bank_active"]["size"] > 0:
-            # Get particle from active bank
-            P = kernel.get_particle(mcdc["bank_active"], mcdc)
+        source_closeout(mcdc, idx_work, N_prog)
 
-            # Apply weight window
-            if mcdc["technique"]["weight_window"]:
-                kernel.weight_window(P, mcdc)
+    if mcdc["technique"]["domain_decomposition"]:
+        source_dd_resolution(mcdc)
 
-            # Particle tracker
-            if mcdc["setting"]["track_particle"]:
-                mcdc["particle_track_particle_ID"] += 1
 
-            # Particle loop
-            loop_particle(P, mcdc)
+def gpu_sources_spec():
 
-        # =====================================================================
-        # Closeout
-        # =====================================================================
+    def make_work(prog: nb.uintp) -> nb.boolean:
+        mcdc = adapt.device(prog)
 
-        # Tally history closeout for one-batch fixed-source simulation
-        if not mcdc["setting"]["mode_eigenvalue"] and mcdc["setting"]["N_batch"] == 1:
-            kernel.tally_closeout_history(mcdc)
+        idx_work = adapt.global_add(mcdc["mpi_work_iter"], 0, 1)
 
-        # Tally history closeout for multi-batch uq simulation
-        if mcdc["technique"]["uq"]:
-            kernel.uq_tally_closeout_history(mcdc)
+        if idx_work >= mcdc["mpi_work_size"]:
+            return False
 
-        # Progress printout
-        percent = (idx_work + 1.0) / mcdc["mpi_work_size"]
-        if mcdc["setting"]["progress_bar"] and int(percent * 100.0) > N_prog:
-            N_prog += 1
-            with objmode():
-                print_progress(percent, mcdc)
+        generate_source_particle(
+            mcdc["mpi_work_start"], nb.uint64(idx_work), mcdc["source_seed"], prog
+        )
+        return True
+
+    def initialize(prog: nb.uintp):
+        pass
+
+    def finalize(prog: nb.uintp):
+        pass
+
+    base_fns = (initialize, finalize, make_work)
+
+    def step(prog: nb.uintp, P: adapt.particle_gpu):
+        mcdc = adapt.device(prog)
+        if P["fresh"]:
+            prep_particle(P, prog)
+        P["fresh"] = False
+        step_particle(P, prog)
+        if P["alive"]:
+            adapt.step_async(prog, P)
+
+    async_fns = [step]
+    return adapt.harm.RuntimeSpec("mcdc_source", adapt.state_spec, base_fns, async_fns)
+
+
+@njit
+def gpu_loop_source(seed, mcdc):
+    # Progress bar indicator
+    N_prog = 0
+
+    # =====================================================================
+    # GPU Interop
+    # =====================================================================
+
+    # Number of blocks to launch and number of iterations to run
+    block_count = 240
+    iter_count = 65536
+
+    mcdc["mpi_work_iter"][0] = 0
+    mcdc["source_seed"] = seed
+
+    # Store the global state to the GPU
+    src_store_state(mcdc["gpu_state"], mcdc)
+
+    # Execute the program, and continue to do so until it is done
+    src_exec_program(mcdc["source_program"], block_count, iter_count)
+    while not src_complete(mcdc["source_program"]):
+        src_exec_program(mcdc["source_program"], block_count, iter_count)
+
+    # Recover the original program state
+    src_load_state(mcdc, mcdc["gpu_state"])
+    src_clear_flags(mcdc["source_program"])
+
+    kernel.set_bank_size(mcdc["bank_active"], 0)
+
+    # =====================================================================
+    # Closeout (Moved out of the typical particle loop)
+    # =====================================================================
+
+    source_closeout(mcdc, 1, 1)
+
+    if mcdc["technique"]["domain_decomposition"]:
+        source_dd_resolution(mcdc)
 
 
 # =========================================================================
@@ -215,92 +444,110 @@ def loop_source(seed, mcdc):
 # =========================================================================
 
 
-@njit(cache=True)
-def loop_particle(P, mcdc):
+@njit(cache=caching)
+def loop_particle(P, prog):
+    mcdc = adapt.device(prog)
+
     # Particle tracker
     if mcdc["setting"]["track_particle"]:
         kernel.track_particle(P, mcdc)
 
     while P["alive"]:
-        # Find cell from root universe if unknown
-        if P["cell_ID"] == -1:
-            trans = np.zeros(3)
-            P["cell_ID"] = kernel.get_particle_cell(P, 0, trans, mcdc)
-
-        # Determine and move to event
-        kernel.move_to_event(P, mcdc)
-        event = P["event"]
-
-        # The & operator here is a bitwise and.
-        # It is used to determine if an event type is part of the particle event.
-
-        # Collision events
-        if event & EVENT_COLLISION:
-            # Generate IC?
-            if mcdc["technique"]["IC_generator"] and mcdc["cycle_active"]:
-                kernel.bank_IC(P, mcdc)
-
-            # Branchless collision?
-            if mcdc["technique"]["branchless_collision"]:
-                kernel.branchless_collision(P, mcdc)
-
-            # Analog collision
-            else:
-                # Get collision type
-                kernel.collision(P, mcdc)
-                event = P["event"]
-
-                # Perform collision
-                if event == EVENT_SCATTERING:
-                    kernel.scattering(P, mcdc)
-                elif event == EVENT_FISSION:
-                    kernel.fission(P, mcdc)
-
-                # Sensitivity quantification for nuclide?
-                material = mcdc["materials"][P["material_ID"]]
-                if material["sensitivity"] and (
-                    P["sensitivity_ID"] == 0
-                    or mcdc["technique"]["dsm_order"] == 2
-                    and P["sensitivity_ID"] <= mcdc["setting"]["N_sensitivity"]
-                ):
-                    kernel.sensitivity_material(P, mcdc)
+        step_particle(P, prog)
 
-        # Surface crossing
-        if event & EVENT_SURFACE:
-            kernel.surface_crossing(P, mcdc)
+    # Particle tracker
+    if mcdc["setting"]["track_particle"]:
+        kernel.track_particle(P, mcdc)
 
-        # Lattice or mesh crossing (skipped if surface crossing)
-        elif event & EVENT_LATTICE or event & EVENT_MESH:
-            kernel.shift_particle(P, SHIFT)
 
-        # Moving surface transition
-        if event & EVENT_SURFACE_MOVE:
-            P["t"] += SHIFT
-            P["cell_ID"] = -1
+@njit(cache=caching)
+def step_particle(P, prog):
+    mcdc = adapt.device(prog)
 
-        # Census time crossing
-        if event & EVENT_CENSUS:
-            P["t"] += SHIFT
-            kernel.add_particle(kernel.copy_particle(P), mcdc["bank_census"])
-            P["alive"] = False
+    # Find cell from root universe if unknown
+    if P["cell_ID"] == -1:
+        trans_struct = adapt.local_translate()
+        trans = trans_struct["values"]
+        P["cell_ID"] = kernel.get_particle_cell(P, 0, trans, mcdc)
 
-        # Time boundary crossing
-        if event & EVENT_TIME_BOUNDARY:
-            P["alive"] = False
+    # Determine and move to event
+    kernel.move_to_event(P, mcdc)
+    event = P["event"]
 
-        # Apply weight window
-        if P["alive"] and mcdc["technique"]["weight_window"]:
-            kernel.weight_window(P, mcdc)
+    # The & operator here is a bitwise and.
+    # It is used to determine if an event type is part of the particle event.
 
-        # Apply weight roulette
-        if P["alive"] and mcdc["technique"]["weight_roulette"]:
-            # check if weight has fallen below threshold
-            if abs(P["w"]) <= mcdc["technique"]["wr_threshold"]:
-                kernel.weight_roulette(P, mcdc)
+    # Collision events
+    if event & EVENT_COLLISION:
+        # Generate IC?
+        if mcdc["technique"]["IC_generator"] and mcdc["cycle_active"]:
+            kernel.bank_IC(P, prog)
 
-    # Particle tracker
-    if mcdc["setting"]["track_particle"]:
-        kernel.track_particle(P, mcdc)
+        # Branchless collision?
+        if mcdc["technique"]["branchless_collision"]:
+            kernel.branchless_collision(P, prog)
+
+        # Analog collision
+        else:
+            # Get collision type
+            kernel.collision(P, mcdc)
+            event = P["event"]
+
+            # Perform collision
+            if event == EVENT_SCATTERING:
+                kernel.scattering(P, prog)
+            elif event == EVENT_FISSION:
+                kernel.fission(P, prog)
+
+            # Sensitivity quantification for nuclide?
+            material = mcdc["materials"][P["material_ID"]]
+            if material["sensitivity"] and (
+                P["sensitivity_ID"] == 0
+                or mcdc["technique"]["dsm_order"] == 2
+                and P["sensitivity_ID"] <= mcdc["setting"]["N_sensitivity"]
+            ):
+                kernel.sensitivity_material(P, prog)
+
+    # Surface crossing
+    if event & EVENT_SURFACE:
+        kernel.surface_crossing(P, prog)
+        if event & EVENT_DOMAIN:
+            if not (
+                mcdc["surfaces"][P["surface_ID"]]["reflective"]
+                or mcdc["surfaces"][P["surface_ID"]]["vacuum"]
+            ):
+                kernel.domain_crossing(P, mcdc)
+
+    # Lattice or mesh crossing (skipped if surface crossing)
+    elif event & EVENT_LATTICE or event & EVENT_MESH:
+        kernel.shift_particle(P, SHIFT)
+        if event & EVENT_DOMAIN:
+            kernel.domain_crossing(P, mcdc)
+
+    # Moving surface transition
+    if event & EVENT_SURFACE_MOVE:
+        P["t"] += SHIFT
+        P["cell_ID"] = -1
+
+    # Census time crossing
+    if event & EVENT_CENSUS:
+        P["t"] += SHIFT
+        adapt.add_census(P, prog)
+        P["alive"] = False
+
+    # Time boundary crossing
+    if event & EVENT_TIME_BOUNDARY:
+        P["alive"] = False
+
+    # Apply weight window
+    if P["alive"] and mcdc["technique"]["weight_window"]:
+        kernel.weight_window(P, prog)
+
+    # Apply weight roulette
+    if P["alive"] and mcdc["technique"]["weight_roulette"]:
+        # check if weight has fallen below threshold
+        if abs(P["w"]) <= mcdc["technique"]["wr_threshold"]:
+            kernel.weight_roulette(P, mcdc)
 
 
 # =============================================================================
@@ -308,14 +555,12 @@ def loop_particle(P, mcdc):
 # =============================================================================
 
 
-@njit(cache=True)
+@njit(cache=caching)
 def loop_iqmc(mcdc):
     # function calls from specified solvers
     iqmc = mcdc["technique"]["iqmc"]
     kernel.iqmc_preprocess(mcdc)
     if mcdc["setting"]["mode_eigenvalue"]:
-        if iqmc["eigenmode_solver"] == "davidson":
-            davidson(mcdc)
         if iqmc["eigenmode_solver"] == "power_iteration":
             power_iteration(mcdc)
     else:
@@ -325,7 +570,7 @@ def loop_iqmc(mcdc):
             gmres(mcdc)
 
 
-@njit(cache=True)
+@njit(cache=caching)
 def source_iteration(mcdc):
     simulation_end = False
     iqmc = mcdc["technique"]["iqmc"]
@@ -333,7 +578,7 @@ def source_iteration(mcdc):
 
     while not simulation_end:
         # reset particle bank size
-        mcdc["bank_source"]["size"] = 0
+        kernel.set_bank_size(mcdc["bank_source"], 0)
         # initialize particles with LDS
         kernel.iqmc_prepare_particles(mcdc)
         # reset tallies for next loop
@@ -363,7 +608,7 @@ def source_iteration(mcdc):
         total_source_old = iqmc["total_source"].copy()
 
 
-@njit(cache=True)
+@njit(cache=caching)
 def gmres(mcdc):
     """
     GMRES solver.
@@ -518,7 +763,7 @@ def gmres(mcdc):
             return
 
 
-@njit(cache=True)
+@njit(cache=caching)
 def power_iteration(mcdc):
     simulation_end = False
     iqmc = mcdc["technique"]["iqmc"]
@@ -564,117 +809,113 @@ def power_iteration(mcdc):
                 print_iqmc_eigenvalue_exit_code(mcdc)
 
 
-@njit(cache=True)
-def davidson(mcdc):
-    """
-    The generalized Davidson method is a Krylov subspace method for solving
-    the generalized eigenvalue problem. The algorithm here is based on the
-    outline in:
+# =============================================================================
+# Precursor source loop
+# =============================================================================
 
-        Subramanian, C., et al. "The Davidson method as an alternative to
-        power iterations for criticality calculations." Annals of nuclear
-        energy 38.12 (2011): 2818-2823.
 
-    """
-    # TODO: handle imaginary eigenvalues
-    iqmc = mcdc["technique"]["iqmc"]
-    # Davidson parameters
-    simulation_end = False
-    maxit = iqmc["maxitt"]
-    tol = iqmc["tol"]
-    # num_sweeps: number of preconditioner sweeps
-    num_sweeps = iqmc["preconditioner_sweeps"]
-    # m : restart parameter
-    m = iqmc["krylov_restart"]
-    k_old = mcdc["k_eff"]
-    # initial size of Krylov subspace
-    Vsize = 1
-    # l : number of eigenvalues to solve for
-    l = 1
-    # vector size
-    Nt = iqmc["total_source"].size
-    # allocate memory then use slice indexing in loop
-    V = np.zeros((Nt, m), dtype=np.float64)
-    HV = np.zeros((Nt, m), dtype=np.float64)
-    FV = np.zeros((Nt, m), dtype=np.float64)
-
-    V0 = iqmc["total_source"].copy()
-    V0 = kernel.preconditioner(V0, mcdc, num_sweeps=5)
-    # orthonormalize initial guess
-    V0 = V0 / np.linalg.norm(V0)
-    V[:, 0] = V0
-
-    if m is None:
-        # unless specified there is no restart parameter
-        m = maxit + 1
-
-    # Davidson Routine
-    while not simulation_end:
-        # Calculate V*H*V (HxV is scattering linear operator function)
-        HV[:, Vsize - 1] = kernel.HxV(V[:, :Vsize], mcdc)
-        VHV = np.dot(cga(V[:, :Vsize].T), cga(HV[:, :Vsize]))
-        # Calculate V*F*V (FxV is fission linear operator function)
-        FV[:, Vsize - 1] = kernel.FxV(V[:, :Vsize], mcdc)
-        VFV = np.dot(cga(V[:, :Vsize].T), cga(FV[:, :Vsize]))
-        # solve for eigenvalues and vectors
-        with objmode(Lambda="complex128[:]", w="complex128[:,:]"):
-            Lambda, w = eig(VFV, b=VHV)
-            Lambda = np.array(Lambda, dtype=np.complex128)
-            w = np.array(w, dtype=np.complex128)
-
-        assert Lambda.imag.all() == 0.0
-        Lambda = Lambda.real
-        w = w.real
-        # get indices of eigenvalues from largest to smallest
-        idx = np.flip(Lambda.argsort())
-        # sort eigenvalues from largest to smallest
-        Lambda = Lambda[idx]
-        # take the l largest eigenvalues
-        Lambda = Lambda[:l]
-        # sort corresponding eigenvector (oriented by column)
-        w = w[:, idx]
-        # take the l largest eigenvectors
-        w = w[:, :l]
-        # assign keff
-        mcdc["k_eff"] = Lambda[0]
-        # Ritz vector
-        u = np.dot(cga(V[:, :Vsize]), cga(w))
-        # residual
-        res = kernel.FxV(u, mcdc) - Lambda * kernel.HxV(u, mcdc)
-        iqmc["res_outter"] = abs(mcdc["k_eff"] - k_old) / k_old
-        k_old = mcdc["k_eff"]
-        iqmc["itt_outter"] += 1
-        with objmode():
-            print_iqmc_eigenvalue_progress(mcdc)
+@njit(cache=caching)
+def generate_precursor_particle(DNP, particle_idx, seed_work, prog):
+    mcdc = adapt.device(prog)
+
+    # Set groups
+    j = DNP["g"]
+    g = DNP["n_g"]
+
+    # Create new particle
+    P_new = adapt.local_particle()
+    part_seed = kernel.split_seed(particle_idx, seed_work)
+    P_new["rng_seed"] = part_seed
+    P_new["alive"] = True
+    P_new["w"] = 1.0
+    P_new["sensitivity_ID"] = 0
+
+    # Set position
+    P_new["x"] = DNP["x"]
+    P_new["y"] = DNP["y"]
+    P_new["z"] = DNP["z"]
+
+    # Get material
+    trans_struct = adapt.local_translate()
+    trans = trans_struct["values"]
+    P_new["cell_ID"] = kernel.get_particle_cell(P_new, 0, trans, mcdc)
+    material_ID = kernel.get_particle_material(P_new, mcdc)
+    material = mcdc["materials"][material_ID]
+    G = material["G"]
+
+    # Sample nuclide and get spectrum and decay constant
+    N_nuclide = material["N_nuclide"]
+    if N_nuclide == 1:
+        nuclide = mcdc["nuclides"][material["nuclide_IDs"][0]]
+        spectrum = nuclide["chi_d"][j]
+        decay = nuclide["decay"][j]
+    else:
+        SigmaF = material["fission"][g]  # MG only
+        nu_d = material["nu_d"][g]
+        xi = kernel.rng(P_new) * nu_d[j] * SigmaF
+        tot = 0.0
+        for i in range(N_nuclide):
+            nuclide = mcdc["nuclides"][material["nuclide_IDs"][i]]
+            density = material["nuclide_densities"][i]
+            tot += density * nuclide["nu_d"][g, j] * nuclide["fission"][g]
+            if xi < tot:
+                # Nuclide determined, now get the constant and spectruum
+                spectrum = nuclide["chi_d"][j]
+                decay = nuclide["decay"][j]
+                break
 
-        # check convergence criteria
-        if (iqmc["itt_outter"] >= maxit) or (iqmc["res_outter"] <= tol):
-            simulation_end = True
-            with objmode():
-                print_iqmc_eigenvalue_exit_code(mcdc)
-            # return
-        else:
-            # Precondition for next iteration
-            t = kernel.preconditioner(res, mcdc, num_sweeps)
-            # check restart condition
-            if Vsize <= m - l:
-                # appends new orthogonalization to V
-                V[:, : Vsize + 1] = kernel.modified_gram_schmidt(V[:, :Vsize], t)
-                Vsize += 1
-            else:
-                # "restarts" by appending to a new array
-                Vsize = l + 1
-                V[:, :Vsize] = kernel.modified_gram_schmidt(u, t)
-        # TODO: normalize and save final scalar flux
-        iqmc["score"]["flux"] /= iqmc["score"]["flux"].sum()
+    # Sample emission time
+    P_new["t"] = -math.log(kernel.rng(P_new)) / decay
+    idx_census = mcdc["idx_census"]
+    if idx_census > 0:
+        P_new["t"] += mcdc["setting"]["census_time"][idx_census - 1]
 
+    # Accept if it is inside current census index
+    if P_new["t"] < mcdc["setting"]["census_time"][idx_census]:
+        # Reduce precursor weight
+        DNP["w"] -= 1.0
 
-# =============================================================================
-# Precursor source loop
-# =============================================================================
+        # Skip if it's beyond time boundary
+        if P_new["t"] > mcdc["setting"]["time_boundary"]:
+            return
+
+        # Sample energy
+        xi = kernel.rng(P_new)
+        tot = 0.0
+        for g_out in range(G):
+            tot += spectrum[g_out]
+            if tot > xi:
+                break
+        P_new["g"] = g_out
+
+        # Sample direction
+        (
+            P_new["ux"],
+            P_new["uy"],
+            P_new["uz"],
+        ) = kernel.sample_isotropic_direction(P_new)
 
+        # Push to active bank
+        adapt.add_active(P_new, prog)
 
-@njit(cache=True)
+
+@njit(cache=caching)
+def source_precursor_closeout(prog, idx_work, N_prog):
+    mcdc = adapt.device(prog)
+
+    # Tally history closeout for fixed-source simulation
+    if not mcdc["setting"]["mode_eigenvalue"]:
+        kernel.tally_closeout_history(mcdc)
+
+    # Progress printout
+    percent = (idx_work + 1.0) / mcdc["mpi_work_size_precursor"]
+    if mcdc["setting"]["progress_bar"] and int(percent * 100.0) > N_prog:
+        N_prog += 1
+        with objmode():
+            print_progress(percent, mcdc)
+
+
+@njit
 def loop_source_precursor(seed, mcdc):
     # TODO: censussed neutrons seeding is still not reproducible
 
@@ -698,10 +939,6 @@ def loop_source_precursor(seed, mcdc):
         # Get precursor
         DNP = mcdc["bank_precursor"]["precursors"][idx_work]
 
-        # Set groups
-        j = DNP["g"]
-        g = DNP["n_g"]
-
         # Determine number of particles to be generated
         w = DNP["w"]
         N = math.floor(w)
@@ -716,108 +953,184 @@ def loop_source_precursor(seed, mcdc):
         # =====================================================================
 
         for particle_idx in range(N):
-            # Create new particle
-            P_new = np.zeros(1, dtype=type_.particle)[0]
-            part_seed = kernel.split_seed(particle_idx, seed_work)
-            P_new["rng_seed"] = part_seed
-            P_new["alive"] = True
-            P_new["w"] = 1.0
-            P_new["sensitivity_ID"] = 0
-
-            # Set position
-            P_new["x"] = DNP["x"]
-            P_new["y"] = DNP["y"]
-            P_new["z"] = DNP["z"]
-
-            # Get material
-            trans = np.zeros(3)
-            P_new["cell_ID"] = kernel.get_particle_cell(P_new, 0, trans, mcdc)
-            material_ID = kernel.get_particle_material(P_new, mcdc)
-            material = mcdc["materials"][material_ID]
-            G = material["G"]
-
-            # Sample nuclide and get spectrum and decay constant
-            N_nuclide = material["N_nuclide"]
-            if N_nuclide == 1:
-                nuclide = mcdc["nuclides"][material["nuclide_IDs"][0]]
-                spectrum = nuclide["chi_d"][j]
-                decay = nuclide["decay"][j]
-            else:
-                SigmaF = material["fission"][g]  # MG only
-                nu_d = material["nu_d"][g]
-                xi = kernel.rng(P_new) * nu_d[j] * SigmaF
-                tot = 0.0
-                for i in range(N_nuclide):
-                    nuclide = mcdc["nuclides"][material["nuclide_IDs"][i]]
-                    density = material["nuclide_densities"][i]
-                    tot += density * nuclide["nu_d"][g, j] * nuclide["fission"][g]
-                    if xi < tot:
-                        # Nuclide determined, now get the constant and spectruum
-                        spectrum = nuclide["chi_d"][j]
-                        decay = nuclide["decay"][j]
-                        break
-
-            # Sample emission time
-            P_new["t"] = -math.log(kernel.rng(P_new)) / decay
-            idx_census = mcdc["idx_census"]
-            if idx_census > 0:
-                P_new["t"] += mcdc["setting"]["census_time"][idx_census - 1]
-
-            # Accept if it is inside current census index
-            if P_new["t"] < mcdc["setting"]["census_time"][idx_census]:
-                # Reduce precursor weight
-                DNP["w"] -= 1.0
-
-                # Skip if it's beyond time boundary
-                if P_new["t"] > mcdc["setting"]["time_boundary"]:
-                    continue
-
-                # Sample energy
-                xi = kernel.rng(P_new)
-                tot = 0.0
-                for g_out in range(G):
-                    tot += spectrum[g_out]
-                    if tot > xi:
-                        break
-                P_new["g"] = g_out
-
-                # Sample direction
-                (
-                    P_new["ux"],
-                    P_new["uy"],
-                    P_new["uz"],
-                ) = kernel.sample_isotropic_direction(P_new)
-
-                # Push to active bank
-                kernel.add_particle(kernel.copy_particle(P_new), mcdc["bank_active"])
-
-                # Loop until active bank is exhausted
-                while mcdc["bank_active"]["size"] > 0:
-                    # Get particle from active bank
-                    P = kernel.get_particle(mcdc["bank_active"], mcdc)
-
-                    # Apply weight window
-                    if mcdc["technique"]["weight_window"]:
-                        kernel.weight_window(P, mcdc)
-
-                    # Particle tracker
-                    if mcdc["setting"]["track_particle"]:
-                        mcdc["particle_track_particle_ID"] += 1
-
-                    # Particle loop
-                    loop_particle(P, mcdc)
+
+            generate_precursor_particle(DNP, particle_idx, seed_work, mcdc)
+
+            exhaust_active_bank(mcdc)
 
         # =====================================================================
         # Closeout
         # =====================================================================
 
-        # Tally history closeout for fixed-source simulation
-        if not mcdc["setting"]["mode_eigenvalue"]:
-            kernel.tally_closeout_history(mcdc)
+        source_precursor_closeout(mcdc, idx_work, N_prog)
 
-        # Progress printout
-        percent = (idx_work + 1.0) / mcdc["mpi_work_size_precursor"]
-        if mcdc["setting"]["progress_bar"] and int(percent * 100.0) > N_prog:
-            N_prog += 1
-            with objmode():
-                print_progress(percent, mcdc)
+
+def gpu_precursor_spec():
+
+    def make_work(prog: nb.uintp) -> nb.boolean:
+        mcdc = adapt.device(prog)
+
+        idx_work = adapt.global_add(mcdc["mpi_work_iter"], 0, 1)
+
+        if idx_work >= mcdc["mpi_work_size_precursor"]:
+            return False
+
+        seed = mcdc["source_seed"]
+
+        # Get precursor
+        DNP = mcdc["bank_precursor"]["precursors"][idx_work]
+
+        # Determine number of particles to be generated
+        w = DNP["w"]
+        N = math.floor(w)
+        # "Roulette" the last particle
+        seed_work = kernel.split_seed(idx_work, seed)
+        if kernel.rng_from_seed(seed_work) < w - N:
+            N += 1
+        DNP["w"] = N
+
+        # =====================================================================
+        # Loop over source particles from the source precursor
+        # =====================================================================
+
+        for particle_idx in range(N):
+            generate_precursor_particle(DNP, particle_idx, seed_work, prog)
+
+        return True
+
+    def initialize(prog: nb.uintp):
+        pass
+
+    def finalize(prog: nb.uintp):
+        pass
+
+    base_fns = (initialize, finalize, make_work)
+
+    def step(prog: nb.uintp, P: adapt.particle_gpu):
+        mcdc = adapt.device(prog)
+        if P["fresh"]:
+            prep_particle(P, prog)
+        P["fresh"] = False
+        step_particle(P, prog)
+        if P["alive"]:
+            adapt.step_async(prog, P)
+
+    async_fns = [step]
+    return adapt.harm.RuntimeSpec(
+        "mcdc_precursor", adapt.state_spec, base_fns, async_fns
+    )
+
+
+@njit
+def gpu_loop_source_precursor(seed, mcdc):
+    # TODO: censussed neutrons seeding is still not reproducible
+
+    # Progress bar indicator
+    N_prog = 0
+
+    # =========================================================================
+    # Sync. RNG skip ahead for reproducibility
+    # =========================================================================
+
+    # Exscan upper estimate of number of particles generated locally
+    idx_start, N_local, N_global = kernel.bank_scanning_DNP(
+        mcdc["bank_precursor"], mcdc
+    )
+
+    # =====================================================================
+    # GPU Interop
+    # =====================================================================
+
+    # Number of blocks to launch and number of iterations to run
+    block_count = 240
+    iter_count = 65536
+
+    mcdc["mpi_work_iter"][0] = 0
+    mcdc["source_seed"] = seed
+
+    # Store the global state to the GPU
+    pre_store_state(mcdc["gpu_state"], mcdc)
+
+    # Execute the program, and continue to do so until it is done
+    pre_exec_program(mcdc["source_program"], block_count, iter_count)
+    while not pre_complete(mcdc["source_program"]):
+        pre_exec_program(mcdc["source_program"], block_count, iter_count)
+
+    # Recover the original program state
+    pre_load_state(mcdc, mcdc["gpu_state"])
+    pre_clear_flags(mcdc["source_program"])
+
+    kernel.set_bank_size(mcdc["bank_active"], 0)
+
+    # =====================================================================
+    # Closeout (moved out of loop)
+    # =====================================================================
+
+    source_precursor_closeout(mcdc, 1, 1)
+
+
+def build_gpu_progs():
+
+    src_spec = gpu_sources_spec()
+    pre_spec = gpu_precursor_spec()
+
+    adapt.harm.RuntimeSpec.bind_and_load()
+
+    src_fns = src_spec.async_functions()
+    pre_fns = pre_spec.async_functions()
+
+    global alloc_state, free_state
+    alloc_state = src_fns["alloc_state"]
+    free_state = src_fns["free_state"]
+
+    global src_alloc_program, src_free_program, src_load_state, src_store_state
+    global src_init_program, src_exec_program, src_complete, src_clear_flags
+    src_alloc_program = src_fns["alloc_program"]
+    src_free_program = src_fns["free_program"]
+    src_load_state = src_fns["load_state"]
+    src_store_state = src_fns["store_state"]
+    src_init_program = src_fns["init_program"]
+    src_exec_program = src_fns["exec_program"]
+    src_complete = src_fns["complete"]
+    src_clear_flags = src_fns["clear_flags"]
+
+    global pre_alloc_program, pre_free_program, pre_load_state, pre_store_state
+    global pre_init_program, pre_exec_program, pre_complete, pre_clear_flags
+    pre_alloc_state = pre_fns["alloc_state"]
+    pre_free_state = pre_fns["free_state"]
+    pre_alloc_program = pre_fns["alloc_program"]
+    pre_free_program = pre_fns["free_program"]
+    pre_load_state = pre_fns["load_state"]
+    pre_store_state = pre_fns["store_state"]
+    pre_init_program = pre_fns["init_program"]
+    pre_exec_program = pre_fns["exec_program"]
+    pre_complete = pre_fns["complete"]
+    pre_clear_flags = pre_fns["clear_flags"]
+
+    @njit
+    def real_setup_gpu(mcdc):
+        arena_size = 0x10000
+        block_count = 240
+        mcdc["gpu_state"] = adapt.cast_voidptr_to_uintp(alloc_state())
+        mcdc["source_program"] = adapt.cast_voidptr_to_uintp(
+            src_alloc_program(mcdc["gpu_state"], arena_size)
+        )
+        src_init_program(mcdc["source_program"], 4096)
+        mcdc["precursor_program"] = adapt.cast_voidptr_to_uintp(
+            pre_alloc_program(mcdc["gpu_state"], arena_size)
+        )
+        pre_init_program(mcdc["precursor_program"], 4096)
+
+    @njit
+    def real_teardown_gpu(mcdc):
+        src_free_program(adapt.cast_uintp_to_voidptr(mcdc["source_program"]))
+        pre_free_program(adapt.cast_uintp_to_voidptr(mcdc["precursor_program"]))
+        free_state(adapt.cast_uintp_to_voidptr(mcdc["gpu_state"]))
+
+    global setup_gpu, teardown_gpu
+    setup_gpu = real_setup_gpu
+    teardown_gpu = real_teardown_gpu
+
+    global loop_source, loop_source_precursor
+    loop_source = gpu_loop_source
+    loop_source_precursor = gpu_loop_source_precursor
diff --git a/mcdc/main.py b/mcdc/main.py
index cfe5eead..a734a1d6 100644
--- a/mcdc/main.py
+++ b/mcdc/main.py
@@ -11,6 +11,11 @@
     choices=["python", "numba", "numba_debug"],
     default="python",
 )
+
+parser.add_argument(
+    "--target", type=str, help="Target", choices=["cpu", "gpu"], default="cpu"
+)
+
 parser.add_argument("--N_particle", type=int, help="Number of particles")
 parser.add_argument("--output", type=str, help="Output file name")
 parser.add_argument("--progress_bar", default=True, action="store_true")
@@ -23,11 +28,13 @@
     print_runtime,
     print_header_eigenvalue,
     print_warning,
+    print_error,
 )
 
 # Set mode
 #   TODO: Will be inside run() once Python/Numba adapter is integrated
 mode = args.mode
+target = args.target
 if mode == "python":
     nb.config.DISABLE_JIT = True
 elif mode == "numba":
@@ -80,7 +87,14 @@
 import mcdc.type_ as type_
 
 from mcdc.constant import *
-from mcdc.loop import loop_fixed_source, loop_eigenvalue, loop_iqmc
+from mcdc.loop import (
+    loop_fixed_source,
+    loop_eigenvalue,
+    loop_iqmc,
+    set_cache,
+    build_gpu_progs,
+)
+import mcdc.loop as loop
 from mcdc.print_ import print_banner, print_msg, print_runtime, print_header_eigenvalue
 
 # Get input_deck
@@ -88,6 +102,8 @@
 
 input_deck = mcdc_.input_deck
 
+import mcdc.adapt as adapt
+
 
 def run():
     # Override input deck with command-line argument, if given
@@ -110,6 +126,9 @@ def run():
 
     # Print banner, hardware configuration, and header
     print_banner(mcdc)
+
+    set_cache(mcdc["setting"]["caching"])
+
     print_msg(" Now running TNT...")
     if mcdc["setting"]["mode_eigenvalue"]:
         print_header_eigenvalue(mcdc)
@@ -137,6 +156,158 @@ def run():
     closeout(mcdc)
 
 
+# =============================================================================
+# utilities for handling discrepancies between input and state types
+# =============================================================================
+
+
+def copy_field(dst, src, name):
+    if "padding" in name:
+        return
+    if isinstance(dst[name], np.ndarray):
+        if isinstance(src[name], np.ndarray) and dst[name].shape != src[name].shape:
+            for dim in src[name].shape:
+                if dim == 0:
+                    return
+            print(
+                f"Warning: Dimension mismatch between input deck and global state for field '{name}'."
+            )
+            print(
+                f"State dimension {dst[name].shape} does not match input dimension {src[name].shape}"
+            )
+        elif isinstance(src[name], list) and dst[name].shape[0] != len(src[name]):
+            if len(src[name]) == 0:
+                return
+            print(
+                f"Warning: Dimension mismatch between input deck and global state for field '{name}'."
+            )
+            print(
+                f"State dimension {dst[name].shape} does not match input dimension {len(src[name])}"
+            )
+    dst[name] = src[name]
+
+
+# =============================================================================
+# prepare domain decomposition
+# =============================================================================
+def get_d_idx(i, j, k, ni, nj):
+    N = i + j * ni + k * ni * nj
+    return N
+
+
+def get_indexes(N, nx, ny):
+    k = int(N / (nx * ny))
+    j = int((N - nx * ny * k) / nx)
+    i = int(N - nx * ny * k - nx * j)
+    return i, j, k
+
+
+def get_neighbors(N, w, nx, ny, nz):
+    i, j, k = get_indexes(N, nx, ny)
+    if i > 0:
+        xn = get_d_idx(i - 1, j, k, nx, ny)
+    else:
+        xn = None
+    if i < (nx - 1):
+        xp = get_d_idx(i + 1, j, k, nx, ny)
+    else:
+        xp = None
+    if j > 0:
+        yn = get_d_idx(i, j - 1, k, nx, ny)
+    else:
+        yn = None
+    if j < (ny - 1):
+        yp = get_d_idx(i, j + 1, k, nx, ny)
+    else:
+        yp = None
+    if k > 0:
+        zn = get_d_idx(i, j, k - 1, nx, ny)
+    else:
+        zn = None
+    if k < (nz - 1):
+        zp = get_d_idx(i, j, k + 1, nx, ny)
+    else:
+        zp = None
+    return xn, xp, yn, yp, zn, zp
+
+
+def dd_prepare():
+    work_ratio = input_deck.technique["dd_work_ratio"]
+
+    d_Nx = input_deck.technique["dd_mesh"]["x"].size - 1
+    d_Ny = input_deck.technique["dd_mesh"]["y"].size - 1
+    d_Nz = input_deck.technique["dd_mesh"]["z"].size - 1
+
+    input_deck.setting["bank_active_buff"] = 1000
+    if input_deck.technique["dd_exchange_rate"] == None:
+        input_deck.technique["dd_exchange_rate"] = 100
+
+    if work_ratio is None:
+        work_ratio = np.ones(d_Nx * d_Ny * d_Nz)
+        input_deck.technique["dd_work_ratio"] = work_ratio
+
+    if (
+        input_deck.technique["domain_decomposition"]
+        and np.sum(work_ratio) != MPI.COMM_WORLD.Get_size()
+    ):
+        print_msg(
+            "Domain work ratio not equal to number of processors, %i != %i "
+            % (np.sum(work_ratio), MPI.COMM_WORLD.Get_size())
+        )
+        exit()
+
+    if input_deck.technique["domain_decomposition"]:
+        # Assigning domain index
+        i = 0
+        rank_info = []
+        for n in range(d_Nx * d_Ny * d_Nz):
+            ranks = []
+            for r in range(int(work_ratio[n])):
+                ranks.append(i)
+                if MPI.COMM_WORLD.Get_rank() == i:
+                    d_idx = n
+                i += 1
+            rank_info.append(ranks)
+        input_deck.technique["dd_idx"] = d_idx
+        xn, xp, yn, yp, zn, zp = get_neighbors(d_idx, 0, d_Nx, d_Ny, d_Nz)
+    else:
+        input_deck.technique["dd_idx"] = 0
+        input_deck.technique["dd_xp_neigh"] = []
+        input_deck.technique["dd_xn_neigh"] = []
+        input_deck.technique["dd_yp_neigh"] = []
+        input_deck.technique["dd_yn_neigh"] = []
+        input_deck.technique["dd_zp_neigh"] = []
+        input_deck.technique["dd_zn_neigh"] = []
+        return
+
+    if xp is not None:
+        input_deck.technique["dd_xp_neigh"] = rank_info[xp]
+    else:
+        input_deck.technique["dd_xp_neigh"] = []
+    if xn is not None:
+        input_deck.technique["dd_xn_neigh"] = rank_info[xn]
+    else:
+        input_deck.technique["dd_xn_neigh"] = []
+
+    if yp is not None:
+        input_deck.technique["dd_yp_neigh"] = rank_info[yp]
+    else:
+        input_deck.technique["dd_yp_neigh"] = []
+    if yn is not None:
+        input_deck.technique["dd_yn_neigh"] = rank_info[yn]
+    else:
+        input_deck.technique["dd_yn_neigh"] = []
+
+    if zp is not None:
+        input_deck.technique["dd_zp_neigh"] = rank_info[zp]
+    else:
+        input_deck.technique["dd_zp_neigh"] = []
+    if zn is not None:
+        input_deck.technique["dd_zn_neigh"] = rank_info[zn]
+    else:
+        input_deck.technique["dd_zn_neigh"] = []
+
+
 def prepare():
     """
     Preparing the MC transport simulation:
@@ -145,6 +316,8 @@ def prepare():
       (3) Create and set up global variable container `mcdc`
     """
 
+    dd_prepare()
+
     # =========================================================================
     # Adapt kernels
     # =========================================================================
@@ -168,8 +341,15 @@ def prepare():
     type_.make_type_setting(input_deck)
     type_.make_type_uq_tally(input_deck)
     type_.make_type_uq(input_deck)
+    type_.make_type_domain_decomp(input_deck)
+    type_.make_type_dd_turnstile_event(input_deck)
     type_.make_type_technique(input_deck)
     type_.make_type_global(input_deck)
+    kernel.adapt_rng(nb.config.DISABLE_JIT)
+
+    type_.make_type_translate(input_deck)
+    type_.make_type_group_array(input_deck)
+    type_.make_type_j_array(input_deck)
 
     # =========================================================================
     # Create the global variable container
@@ -184,6 +364,26 @@ def prepare():
     mode_CE = input_deck.setting["mode_CE"]
     mode_MG = input_deck.setting["mode_MG"]
 
+    # =========================================================================
+    # Platform Targeting, Adapters, Toggles, etc
+    # =========================================================================
+
+    if target == "gpu":
+        if not adapt.HAS_HARMONIZE:
+            print_error(
+                "No module named 'harmonize' - GPU functionality not available. "
+            )
+        adapt.gpu_forward_declare()
+
+    adapt.set_toggle("iQMC", input_deck.technique["iQMC"])
+    adapt.set_toggle("domain_decomp", input_deck.technique["domain_decomposition"])
+    adapt.set_toggle("particle_tracker", mcdc["setting"]["track_particle"])
+    adapt.eval_toggle()
+    adapt.target_for(target)
+    if target == "gpu":
+        build_gpu_progs()
+    adapt.nopython_mode(mode == "numba")
+
     # =========================================================================
     # Nuclides
     # =========================================================================
@@ -192,7 +392,7 @@ def prepare():
     for i in range(N_nuclide):
         # General data
         for name in ["ID", "fissionable", "sensitivity", "sensitivity_ID", "dsm_Np"]:
-            mcdc["nuclides"][i][name] = input_deck.nuclides[i][name]
+            copy_field(mcdc["nuclides"][i], input_deck.nuclides[i], name)
 
         # MG data
         if mode_MG:
@@ -213,7 +413,7 @@ def prepare():
                 "chi_p",
                 "chi_d",
             ]:
-                mcdc["nuclides"][i][name] = input_deck.nuclides[i][name]
+                copy_field(mcdc["nuclides"][i], input_deck.nuclides[i], name)
 
         # CE data (load data from XS library)
         dir_name = os.getenv("MCDC_XSLIB")
@@ -274,7 +474,7 @@ def prepare():
                     input_deck.materials[i][name]
                 )
             else:
-                mcdc["materials"][i][name] = input_deck.materials[i][name]
+                copy_field(mcdc["materials"][i], input_deck.materials[i], name)
 
     # =========================================================================
     # Surfaces
@@ -284,7 +484,7 @@ def prepare():
     for i in range(N_surface):
         for name in type_.surface.names:
             if name not in ["J", "t"]:
-                mcdc["surfaces"][i][name] = input_deck.surfaces[i][name]
+                copy_field(mcdc["surfaces"][i], input_deck.surfaces[i], name)
 
         # Variables with possible different sizes
         for name in ["J", "t"]:
@@ -299,7 +499,7 @@ def prepare():
     for i in range(N_cell):
         for name in type_.cell.names:
             if name not in ["surface_IDs", "positive_flags"]:
-                mcdc["cells"][i][name] = input_deck.cells[i][name]
+                copy_field(mcdc["cells"][i], input_deck.cells[i], name)
 
         # Variables with possible different sizes
         for name in ["surface_IDs", "positive_flags"]:
@@ -346,7 +546,7 @@ def prepare():
     N_source = len(input_deck.sources)
     for i in range(N_source):
         for name in type_.source.names:
-            mcdc["sources"][i][name] = input_deck.sources[i][name]
+            copy_field(mcdc["sources"][i], input_deck.sources[i], name)
 
     # Normalize source probabilities
     tot = 0.0
@@ -361,17 +561,17 @@ def prepare():
 
     for name in type_.tally.names:
         if name not in ["score", "mesh"]:
-            mcdc["tally"][name] = input_deck.tally[name]
+            copy_field(mcdc["tally"], input_deck.tally, name)
     # Set mesh
     for name in type_.mesh_names:
-        mcdc["tally"]["mesh"][name] = input_deck.tally["mesh"][name]
+        copy_field(mcdc["tally"]["mesh"], input_deck.tally["mesh"], name)
 
     # =========================================================================
     # Setting
     # =========================================================================
 
     for name in type_.setting.names:
-        mcdc["setting"][name] = input_deck.setting[name]
+        copy_field(mcdc["setting"], input_deck.setting, name)
 
     # Check if time boundary is above the final tally mesh time grid
     if mcdc["setting"]["time_boundary"] > mcdc["tally"]["mesh"]["t"][-1]:
@@ -396,13 +596,14 @@ def prepare():
         "implicit_capture",
         "population_control",
         "weight_window",
+        "domain_decomposition",
         "weight_roulette",
         "iQMC",
         "IC_generator",
         "branchless_collision",
         "uq",
     ]:
-        mcdc["technique"][name] = input_deck.technique[name]
+        copy_field(mcdc["technique"], input_deck.technique, name)
 
     # =========================================================================
     # Population control
@@ -424,7 +625,7 @@ def prepare():
         "IC_precursor_density",
         "IC_precursor_density_max",
     ]:
-        mcdc["technique"][name] = input_deck.technique[name]
+        copy_field(mcdc["technique"], input_deck.technique, name)
 
     # =========================================================================
     # Weight window (WW)
@@ -432,7 +633,7 @@ def prepare():
 
     # WW mesh
     for name in type_.mesh_names[:-1]:
-        mcdc["technique"]["ww_mesh"][name] = input_deck.technique["ww_mesh"][name]
+        copy_field(mcdc["technique"]["ww_mesh"], input_deck.technique["ww_mesh"], name)
 
     # WW windows
     mcdc["technique"]["ww"] = input_deck.technique["ww"]
@@ -447,6 +648,24 @@ def prepare():
 
     # Survival probability
     mcdc["technique"]["wr_survive"] = input_deck.technique["wr_survive"]
+    # =========================================================================
+    # Domain Decomposition
+    # =========================================================================
+
+    # Set domain mesh
+    if input_deck.technique["domain_decomposition"]:
+        for name in ["x", "y", "z", "t", "mu", "azi"]:
+            copy_field(
+                mcdc["technique"]["dd_mesh"], input_deck.technique["dd_mesh"], name
+            )
+        # Set exchange rate
+        for name in ["dd_exchange_rate", "dd_repro"]:
+            copy_field(mcdc["technique"], input_deck.technique, name)
+        # Set domain index
+        copy_field(mcdc, input_deck.technique, "dd_idx")
+        for name in ["xp", "xn", "yp", "yn", "zp", "zn"]:
+            copy_field(mcdc["technique"], input_deck.technique, f"dd_{name}_neigh")
+        copy_field(mcdc["technique"], input_deck.technique, "dd_work_ratio")
 
     # =========================================================================
     # Quasi Monte Carlo
@@ -464,15 +683,13 @@ def prepare():
             "score_list",
             "score",
         ]:
-            mcdc["technique"]["iqmc"][name] = input_deck.technique["iqmc"][name]
+            copy_field(mcdc["technique"]["iqmc"], input_deck.technique["iqmc"], name)
 
     if input_deck.technique["iQMC"]:
         # pass in mesh
         iqmc = mcdc["technique"]["iqmc"]
-        iqmc["mesh"]["x"] = input_deck.technique["iqmc"]["mesh"]["x"]
-        iqmc["mesh"]["y"] = input_deck.technique["iqmc"]["mesh"]["y"]
-        iqmc["mesh"]["z"] = input_deck.technique["iqmc"]["mesh"]["z"]
-        iqmc["mesh"]["t"] = input_deck.technique["iqmc"]["mesh"]["t"]
+        for name in ["x", "y", "z", "t"]:
+            copy_field(iqmc["mesh"], input_deck.technique["iqmc"]["mesh"], name)
         # pass in score list
         for name, value in input_deck.technique["iqmc"]["score_list"].items():
             iqmc["score_list"][name] = value
@@ -529,7 +746,7 @@ def prepare():
         # Assumes that all tallies will also be uq tallies
         for name in type_.uq_tally.names:
             if name != "score":
-                mcdc["technique"]["uq_tally"][name] = input_deck.tally[name]
+                copy_field(mcdc["technique"]["uq_tally"], input_deck.tally, name)
 
         M = len(input_deck.uq_deltas["materials"])
         for i in range(M):
@@ -563,13 +780,19 @@ def prepare():
             idn = input_deck.uq_deltas["nuclides"][i]["ID"]
             mcdc["technique"]["uq_"]["nuclides"][i]["info"]["ID"] = idn
             for name in type_.uq_nuc.names:
-                mcdc["technique"]["uq_"]["nuclides"][i]["mean"][name] = (
-                    input_deck.nuclides[idn][name]
+                copy_field(
+                    mcdc["technique"]["uq_"]["nuclides"][i]["mean"],
+                    input_deck.nuclides[idn],
+                    name,
                 )
             for name in input_deck.uq_deltas["nuclides"][i]["flags"]:
+                if "padding" in name:
+                    continue
                 mcdc["technique"]["uq_"]["nuclides"][i]["flags"][name] = True
-                mcdc["technique"]["uq_"]["nuclides"][i]["delta"][name] = (
-                    input_deck.uq_deltas["nuclides"][i][name]
+                copy_field(
+                    mcdc["technique"]["uq_"]["nuclides"][i]["delta"],
+                    input_deck.uq_deltas["nuclides"][i],
+                    name,
                 )
             flags = mcdc["technique"]["uq_"]["nuclides"][i]["flags"]
             if flags["capture"] or flags["scatter"] or flags["fission"]:
@@ -587,7 +810,10 @@ def prepare():
     mcdc["mpi_master"] = mcdc["mpi_rank"] == 0
 
     # Distribute work to MPI ranks
-    kernel.distribute_work(mcdc["setting"]["N_particle"], mcdc)
+    if mcdc["technique"]["domain_decomposition"]:
+        kernel.distribute_work_dd(mcdc["setting"]["N_particle"], mcdc)
+    else:
+        kernel.distribute_work(mcdc["setting"]["N_particle"], mcdc)
 
     # =========================================================================
     # Particle banks
@@ -687,6 +913,8 @@ def prepare():
                     "w"
                 ]
 
+    loop.setup_gpu(mcdc)
+
     return mcdc
 
 
@@ -813,15 +1041,6 @@ def generate_hdf5(mcdc):
                 f.create_dataset(
                     "iqmc/tally/source_z", data=T["iqmc"]["score"]["tilt-z"]
                 )
-                f.create_dataset(
-                    "iqmc/tally/source_xy", data=T["iqmc"]["score"]["tilt-xy"]
-                )
-                f.create_dataset(
-                    "iqmc/tally/source_xz", data=T["iqmc"]["score"]["tilt-xz"]
-                )
-                f.create_dataset(
-                    "iqmc/tally/source_yz", data=T["iqmc"]["score"]["tilt-yz"]
-                )
                 # iteration data
                 f.create_dataset("iqmc/itteration_count", data=T["iqmc"]["itt"])
                 f.create_dataset("iqmc/final_residual", data=T["iqmc"]["res"])
@@ -837,7 +1056,7 @@ def generate_hdf5(mcdc):
             # Particle tracker
             if mcdc["setting"]["track_particle"]:
                 with h5py.File(mcdc["setting"]["output"] + "_ptrack.h5", "w") as f:
-                    N_track = mcdc["particle_track_N"]
+                    N_track = mcdc["particle_track_N"][0]
                     f.create_dataset("tracks", data=mcdc["particle_track"][:N_track])
 
             # IC generator
@@ -877,6 +1096,9 @@ def generate_hdf5(mcdc):
 
 
 def closeout(mcdc):
+
+    loop.teardown_gpu(mcdc)
+
     # Runtime
     if mcdc["mpi_master"]:
         with h5py.File(mcdc["setting"]["output_name"] + ".h5", "a") as f:
diff --git a/mcdc/type_.py b/mcdc/type_.py
index db10b516..b6f7db6a 100644
--- a/mcdc/type_.py
+++ b/mcdc/type_.py
@@ -1,17 +1,34 @@
-import math, sys, os, h5py
+import h5py
+import math
 import numpy as np
+import os
 
 from mpi4py import MPI
+from mpi4py.util.dtlib import from_numpy_dtype
 
+from mcdc.print_ import print_error
+
+
+# ==============================================================================
 # Basic types
+# ==============================================================================
+
 float64 = np.float64
 int64 = np.int64
 int32 = np.int32
 uint64 = np.uint64
+uint8 = np.uint8
 bool_ = np.bool_
-str_ = "U30"
+uintp = np.uintp
+str_ = "U32"
+
+
+# ==============================================================================
+# MC/DC types
+# ==============================================================================
+# Currently defined based on input deck
+# TODO: This causes JIT recompilation in certain cases
 
-# MC/DC types, will be defined based on input deck
 particle = None
 particle_record = None
 nuclide = None
@@ -24,9 +41,121 @@
 setting = None
 tally = None
 technique = None
+
+# GPU mode related
+translate = None
+group_array = None
+j_array = None
 global_ = None
 
 
+# ==============================================================================
+# Alignment Logic
+# ==============================================================================
+# While CPU execution can robustly handle all sorts of Numba types, GPU
+# execution requires structs to follow some of the basic properties expected of
+# C-style structs with standard layout:
+#
+#      - Every primitive field is aligned by its size, and padding is inserted
+#        between fields to ensure alignment in arrays and nested data structures
+#
+#      - Every field has a unique address
+#
+# If these rules are violated, memory accesses made in GPUs may encounter
+# problems. For example, in cases where an access is not at an address aligned
+# by their size, a segfault or similar fault will occur, or information will be
+# lost. These issues were fixed by providing a function, align, which ensures the
+# field lists fed to np.dtype fulfill these requirements.
+#
+# The align function does the following:
+#
+#      - Tracks the cumulative offset of fields as they appear in the input list.
+#
+#      - Inserts additional padding fields to ensure that primitive fields are
+#        aligned by their size
+#
+#      - Re-sizes arrays to have at least one element in their array (this ensure
+#        they have a non-zero size, and hence cannot overlap base addresses with
+#        other fields.
+#
+
+
+def fixup_dims(dim_tuple):
+    return tuple([max(d, 1) for d in dim_tuple])
+
+
+def align(field_list):
+    result = []
+    offset = 0
+    pad_id = 0
+    for field in field_list:
+        if len(field) > 3:
+            print_error(
+                "Unexpected struct field specification. Specifications \
+                        usually only consist of 3 or fewer members"
+            )
+        multiplier = 1
+        if len(field) == 3:
+            field = (field[0], field[1], fixup_dims(field[2]))
+            for d in field[2]:
+                multiplier *= d
+        kind = np.dtype(field[1])
+        size = kind.itemsize
+
+        if kind.isbuiltin == 0:
+            alignment = 8
+        elif kind.isbuiltin == 1:
+            alignment = size
+        else:
+            print_error("Unexpected field item type")
+
+        size *= multiplier
+
+        if offset % alignment != 0:
+            pad_size = alignment - (offset % alignment)
+            result.append((f"padding_{pad_id}", uint8, (pad_size,)))
+            pad_id += 1
+            offset += pad_size
+
+        result.append(field)
+        offset += size
+
+    if offset % 8 != 0:
+        pad_size = 8 - (offset % 8)
+        result.append((f"padding_{pad_id}", uint8, (pad_size,)))
+        pad_id += 1
+
+    return result
+
+
+def into_dtype(field_list):
+    result = np.dtype(align(field_list), align=True)
+    return result
+
+
+# ==============================================================================
+# Copy Logic
+# ==============================================================================
+
+
+type_roster = {}
+
+
+def copy_fn_for(kind, name):
+    code = f"@njit\ndef copy_{name}(dst,src):\n"
+    for f_name, spec in kind.fields.items():
+        f_dtype = spec[0]
+        if f_dtype in type_roster:
+            kind_name = type_roster[f_dtype]["name"]
+            code += f"    copy_{kind_name}(dst['{f_name}'],src['{f_name}'])"
+        else:
+            code += f"    dst['{f_name}'] = src['{f_name}']\n"
+    type_roster[kind] = {}
+    type_roster[kind]["name"] = name
+    exec(code)
+    return eval(f"copy_{name}")
+
+
 # ==============================================================================
 # Particle
 # ==============================================================================
@@ -48,6 +177,7 @@ def make_type_particle(input_deck):
         ("E", float64),
         ("w", float64),
         ("alive", bool_),
+        ("fresh", bool_),
         ("material_ID", int64),
         ("cell_ID", int64),
         ("surface_ID", int64),
@@ -74,12 +204,12 @@ def make_type_particle(input_deck):
     struct += [("iqmc", iqmc_struct)]
 
     # Save type
-    particle = np.dtype(struct)
+    particle = into_dtype(struct)
 
 
 # Particle record (in-bank)
 def make_type_particle_record(input_deck):
-    global particle_record
+    global particle_record, particle_record_mpi
 
     struct = [
         ("x", float64),
@@ -113,10 +243,13 @@ def make_type_particle_record(input_deck):
     struct += [("iqmc", iqmc_struct)]
 
     # Save type
-    particle_record = np.dtype(struct)
+    particle_record = into_dtype(struct)
+
+    particle_record_mpi = from_numpy_dtype(particle_record)
+    particle_record_mpi.Commit()
 
 
-precursor = np.dtype(
+precursor = into_dtype(
     [
         ("x", float64),
         ("y", float64),
@@ -134,14 +267,18 @@ def make_type_particle_record(input_deck):
 
 
 def particle_bank(max_size):
-    return np.dtype(
-        [("particles", particle_record, (max_size,)), ("size", int64), ("tag", "U10")]
+    return into_dtype(
+        [
+            ("particles", particle_record, (max_size,)),
+            ("size", int64, (1,)),
+            ("tag", str_),
+        ]
     )
 
 
 def precursor_bank(max_size):
-    return np.dtype(
-        [("precursors", precursor, (max_size,)), ("size", int64), ("tag", "U10")]
+    return into_dtype(
+        [("precursors", precursor, (max_size,)), ("size", int64, (1,)), ("tag", str_)]
     )
 
 
@@ -275,7 +412,7 @@ def make_type_nuclide(input_deck):
     ]
 
     # Set the type
-    nuclide = np.dtype(struct)
+    nuclide = into_dtype(struct)
 
 
 # ==============================================================================
@@ -332,7 +469,7 @@ def make_type_material(input_deck):
     ]
 
     # Set the type
-    material = np.dtype(struct)
+    material = into_dtype(struct)
 
 
 # ==============================================================================
@@ -348,7 +485,7 @@ def make_type_surface(input_deck):
     for surface in input_deck.surfaces:
         Nmax_slice = max(Nmax_slice, surface["N_slice"])
 
-    surface = np.dtype(
+    surface = into_dtype(
         [
             ("ID", int64),
             ("N_slice", int64),
@@ -387,7 +524,7 @@ def make_type_cell(input_deck):
     # Maximum number of surfaces per cell
     Nmax_surface = max([cell["N_surface"] for cell in input_deck.cells])
 
-    cell = np.dtype(
+    cell = into_dtype(
         [
             ("ID", int64),
             ("N_surface", int64),
@@ -421,7 +558,7 @@ def make_type_universe(input_deck):
         card["N_cell"] = N_cell
         card["cell_IDs"] = np.arange(N_cell)
 
-    universe = np.dtype(
+    universe = into_dtype(
         [("ID", int64), ("N_cell", int64), ("cell_IDs", int64, (Nmax_cell,))]
     )
 
@@ -431,7 +568,7 @@ def make_type_universe(input_deck):
 # ==============================================================================
 
 
-mesh_uniform = np.dtype(
+mesh_uniform = into_dtype(
     [
         ("x0", float64),
         ("dx", float64),
@@ -458,7 +595,7 @@ def make_type_lattice(input_deck):
         Nmax_y = max(Nmax_y, card["mesh"]["Ny"])
         Nmax_z = max(Nmax_z, card["mesh"]["Nz"])
 
-    lattice = np.dtype(
+    lattice = into_dtype(
         [("mesh", mesh_uniform), ("universe_IDs", int64, (Nmax_x, Nmax_y, Nmax_z))]
     )
 
@@ -516,7 +653,7 @@ def make_type_source(input_deck):
         ("energy", float64, (2, Nmax_E)),
     ]
 
-    source = np.dtype(struct)
+    source = into_dtype(struct)
 
 
 # ==============================================================================
@@ -554,7 +691,7 @@ def make_type_tally(input_deck):
     struct = [("tracklength", bool_)]
 
     def make_type_score(shape):
-        return np.dtype(
+        return into_dtype(
             [
                 ("bin", float64, shape),
                 ("mean", float64, shape),
@@ -590,11 +727,11 @@ def make_type_score(shape):
         if not card[name]:
             shape = (0,) * len(shape)
         scores_struct += [(name, make_type_score(shape))]
-    scores = np.dtype(scores_struct)
+    scores = into_dtype(scores_struct)
     struct += [("score", scores)]
 
     # Make tally structure
-    tally = np.dtype(struct)
+    tally = into_dtype(struct)
 
 
 # ==============================================================================
@@ -617,7 +754,8 @@ def make_type_setting(deck):
         ("mode_CE", bool_),
         # Misc.
         ("progress_bar", bool_),
-        ("output_name", "U30"),
+        ("caching", bool_),
+        ("output_name", str_),
         ("save_input_deck", bool_),
         ("track_particle", bool_),
         # Eigenvalue mode
@@ -634,17 +772,17 @@ def make_type_setting(deck):
         ("census_time", float64, (card["N_census"],)),
         # Particle source file
         ("source_file", bool_),
-        ("source_file_name", "U30"),
+        ("source_file_name", str_),
         # Initial condition source file
         ("IC_file", bool_),
-        ("IC_file_name", "U30"),
+        ("IC_file_name", str_),
         ("N_precursor", uint64),
         # TODO: Move to technique
         ("N_sensitivity", uint64),
     ]
 
     # Finalize setting type
-    setting = np.dtype(struct)
+    setting = into_dtype(struct)
 
 
 # ==============================================================================
@@ -658,9 +796,6 @@ def make_type_setting(deck):
     "tilt-x",
     "tilt-y",
     "tilt-z",
-    "tilt-xy",
-    "tilt-xz",
-    "tilt-yz",
     "fission-power",
     "fission-source",
 )
@@ -694,6 +829,7 @@ def make_type_technique(input_deck):
         ("iQMC", bool_),
         ("IC_generator", bool_),
         ("branchless_collision", bool_),
+        ("domain_decomposition", bool_),
         ("uq", bool_),
     ]
 
@@ -703,6 +839,24 @@ def make_type_technique(input_deck):
 
     struct += [("pct", int64), ("pc_factor", float64)]
 
+    # =========================================================================
+    # domain decomp
+    # =========================================================================
+    # Mesh
+    mesh, Nx, Ny, Nz, Nt, Nmu, N_azi, Ng = make_type_mesh(card["dd_mesh"])
+    struct += [("dd_mesh", mesh)]
+    struct += [("dd_idx", int64)]
+    struct += [("dd_sent", int64)]
+    struct += [("dd_work_ratio", int64, (len(card["dd_work_ratio"]),))]
+    struct += [("dd_exchange_rate", int64)]
+    struct += [("dd_repro", bool_)]
+    struct += [("dd_xp_neigh", int64, (len(card["dd_xp_neigh"]),))]
+    struct += [("dd_xn_neigh", int64, (len(card["dd_xn_neigh"]),))]
+    struct += [("dd_yp_neigh", int64, (len(card["dd_yp_neigh"]),))]
+    struct += [("dd_yn_neigh", int64, (len(card["dd_yn_neigh"]),))]
+    struct += [("dd_zp_neigh", int64, (len(card["dd_zp_neigh"]),))]
+    struct += [("dd_zn_neigh", int64, (len(card["dd_zn_neigh"]),))]
+
     # =========================================================================
     # Weight window
     # =========================================================================
@@ -765,9 +919,6 @@ def make_type_technique(input_deck):
         ["tilt-x", (Ng, Nt, Nx, Ny, Nz)],
         ["tilt-y", (Ng, Nt, Nx, Ny, Nz)],
         ["tilt-z", (Ng, Nt, Nx, Ny, Nz)],
-        ["tilt-xy", (Ng, Nt, Nx, Ny, Nz)],
-        ["tilt-xz", (Ng, Nt, Nx, Ny, Nz)],
-        ["tilt-yz", (Ng, Nt, Nx, Ny, Nz)],
         ["fission-power", (Ng, Nt, Nx, Ny, Nz)],  # SigmaF*phi
         ["fission-source", (1,)],  # nu*SigmaF*phi
     ]
@@ -782,7 +933,7 @@ def make_type_technique(input_deck):
     for i in range(len(scores_shapes)):
         name = scores_shapes[i][0]
         score_list += [(name, bool_)]
-    score_list = np.dtype(score_list)
+    score_list = into_dtype(score_list)
     iqmc_list += [("score_list", score_list)]
 
     # Add scores to structure
@@ -796,7 +947,7 @@ def make_type_technique(input_deck):
     # TODO: make outter effective fission size zero if not eigenmode
     # (causes problems with numba)
     scores_struct += [("effective-fission-outter", float64, (Ng, Nt, Nx, Ny, Nz))]
-    scores = np.dtype(scores_struct)
+    scores = into_dtype(scores_struct)
     iqmc_list += [("score", scores)]
 
     # Constants
@@ -819,7 +970,7 @@ def make_type_technique(input_deck):
         ("w_min", float64),
     ]
 
-    struct += [("iqmc", iqmc_list)]
+    struct += [("iqmc", into_dtype(iqmc_list))]
 
     # =========================================================================
     # IC generator
@@ -856,7 +1007,7 @@ def make_type_technique(input_deck):
         ("IC_bank_precursor_local", bank_precursor_local),
         ("IC_bank_neutron", bank_neutron),
         ("IC_bank_precursor", bank_precursor),
-        ("IC_fission_score", float64),
+        ("IC_fission_score", float64, (1,)),
         ("IC_fission", float64),
     ]
 
@@ -874,7 +1025,7 @@ def make_type_technique(input_deck):
     struct += [("uq_tally", uq_tally), ("uq_", uq)]
 
     # Finalize technique type
-    technique = np.dtype(struct)
+    technique = into_dtype(struct)
 
 
 # UQ
@@ -882,7 +1033,7 @@ def make_type_uq_tally(input_deck):
     global uq_tally
 
     def make_type_uq_score(shape):
-        return np.dtype(
+        return into_dtype(
             [
                 ("batch_bin", float64, shape),
                 ("batch_var", float64, shape),
@@ -925,18 +1076,18 @@ def make_type_uq_score(shape):
         if not tally_card[name]:
             shape = (0,) * len(shape)
         scores_struct += [(name, make_type_uq_score(shape))]
-    scores = np.dtype(scores_struct)
+    scores = into_dtype(scores_struct)
     struct += [("score", scores)]
 
     # Make tally structure
-    uq_tally = np.dtype(struct)
+    uq_tally = into_dtype(struct)
 
 
 def make_type_uq(input_deck):
     global uq, uq_nuc, uq_mat
 
     #    def make_type_parameter(shape):
-    #        return np.dtype(
+    #        return into_dtype(
     #            [
     #                ("tag", str_),             # nuclides, materials, surfaces, sources
     #                ("ID", int64),
@@ -961,7 +1112,7 @@ def make_type_parameter(G, J, decay=False):
             ("chi_p", float64, (G, G)),
         ]
         struct += [("decay", float64, (J,)), ("chi_d", float64, (J, G))]
-        return np.dtype(struct)
+        return into_dtype(struct)
 
     # Size numbers
     G = input_deck.materials[0]["G"]
@@ -973,7 +1124,7 @@ def make_type_parameter(G, J, decay=False):
     uq_nuc = make_type_parameter(G, J, True)
     uq_mat = make_type_parameter(G, J)
 
-    flags = np.dtype(
+    flags = into_dtype(
         [
             ("speed", bool_),
             ("decay", bool_),
@@ -990,19 +1141,75 @@ def make_type_parameter(G, J, decay=False):
             ("chi_d", bool_),
         ]
     )
-    info = np.dtype([("distribution", str_), ("ID", int64), ("rng_seed", uint64)])
+    info = into_dtype([("distribution", str_), ("ID", int64), ("rng_seed", uint64)])
 
-    container = np.dtype(
+    container = into_dtype(
         [("mean", uq_nuc), ("delta", uq_mat), ("flags", flags), ("info", info)]
     )
 
     N_nuclide = len(uq_deck["nuclides"])
     N_material = len(uq_deck["materials"])
-    uq = np.dtype(
+    uq = into_dtype(
         [("nuclides", container, (N_nuclide,)), ("materials", container, (N_material,))]
     )
 
 
+def make_type_dd_turnstile_event(input_deck):
+    global dd_turnstile_event, dd_turnstile_event_mpi
+    dd_turnstile_event = into_dtype(
+        [
+            ("busy_delta", int32),
+            ("send_delta", int32),
+        ]
+    )
+    dd_turnstile_event_mpi = from_numpy_dtype(dd_turnstile_event)
+    dd_turnstile_event_mpi.Commit()
+
+
+def make_type_domain_decomp(input_deck):
+    global domain_decomp
+    # Domain banks if needed
+    if input_deck.technique["domain_decomposition"]:
+        bank_domain_xp = particle_bank(input_deck.technique["dd_exchange_rate"])
+        bank_domain_xn = particle_bank(input_deck.technique["dd_exchange_rate"])
+        bank_domain_yp = particle_bank(input_deck.technique["dd_exchange_rate"])
+        bank_domain_yn = particle_bank(input_deck.technique["dd_exchange_rate"])
+        bank_domain_zp = particle_bank(input_deck.technique["dd_exchange_rate"])
+        bank_domain_zn = particle_bank(input_deck.technique["dd_exchange_rate"])
+    else:
+        bank_domain_xp = particle_bank(0)
+        bank_domain_xn = particle_bank(0)
+        bank_domain_yp = particle_bank(0)
+        bank_domain_yn = particle_bank(0)
+        bank_domain_zp = particle_bank(0)
+        bank_domain_zn = particle_bank(0)
+
+    domain_decomp = into_dtype(
+        [
+            # Info tracked in all ranks
+            ("bank_xp", bank_domain_xp),
+            ("bank_xn", bank_domain_xn),
+            ("bank_yp", bank_domain_yp),
+            ("bank_yn", bank_domain_yn),
+            ("bank_zp", bank_domain_zp),
+            ("bank_zn", bank_domain_zn),
+            ("send_count", int64),  # Number of particles sent
+            ("recv_count", int64),  # Number of particles recv'd
+            ("rank_busy", bool_),  # True if the rank currently has particles to process
+            (
+                "work_done",
+                int64,
+            ),  # Whether or not there is any outstanding work across any ranks
+            # Info tracked in "leader" rank zero
+            (
+                "send_total",
+                int64,
+            ),  # The total number of particles sent but not yet recv'd
+            ("busy_total", int64),  # The total number of busy ranks
+        ]
+    )
+
+
 param_names = ["tag", "ID", "key", "mean", "delta", "distribution", "rng_seed"]
 
 
@@ -1081,7 +1288,7 @@ def make_type_global(input_deck):
         bank_source = particle_bank(N_work)
 
     # GLobal type
-    global_ = np.dtype(
+    global_ = into_dtype(
         [
             ("nuclides", nuclide, (N_nuclide,)),
             ("materials", material, (N_material,)),
@@ -1093,10 +1300,15 @@ def make_type_global(input_deck):
             ("tally", tally),
             ("setting", setting),
             ("technique", technique),
+            ("domain_decomp", domain_decomp),
             ("bank_active", bank_active),
             ("bank_census", bank_census),
             ("bank_source", bank_source),
             ("bank_precursor", bank_precursor),
+            ("rng_seed_base", uint64),
+            ("rng_seed", uint64),
+            ("rng_stride", int64),
+            ("dd_idx", int64),
             ("k_eff", float64),
             ("k_cycle", float64, (N_cycle,)),
             ("k_avg", float64),
@@ -1112,9 +1324,9 @@ def make_type_global(input_deck):
             ("gyration_radius", float64, (N_cycle,)),
             ("idx_cycle", int64),
             ("cycle_active", bool_),
-            ("eigenvalue_tally_nuSigmaF", float64),
-            ("eigenvalue_tally_n", float64),
-            ("eigenvalue_tally_C", float64),
+            ("eigenvalue_tally_nuSigmaF", float64, (1,)),
+            ("eigenvalue_tally_n", float64, (1,)),
+            ("eigenvalue_tally_C", float64, (1,)),
             ("idx_census", int64),
             ("idx_batch", int64),
             ("mpi_size", int64),
@@ -1132,10 +1344,15 @@ def make_type_global(input_deck):
             ("runtime_output", float64),
             ("runtime_bank_management", float64),
             ("particle_track", float64, (N_track, 8)),
-            ("particle_track_N", int64),
-            ("particle_track_history_ID", int64),
-            ("particle_track_particle_ID", int64),
+            ("particle_track_N", int64, (1,)),
+            ("particle_track_history_ID", int64, (1,)),
+            ("particle_track_particle_ID", int64, (1,)),
             ("precursor_strength", float64),
+            ("mpi_work_iter", int64, (1,)),
+            ("gpu_state", uintp),
+            ("source_program", uintp),
+            ("precursor_program", uintp),
+            ("source_seed", uint64),
         ]
     )
 
@@ -1145,6 +1362,23 @@ def make_type_global(input_deck):
 # ==============================================================================
 
 
+def make_type_translate(input_deck):
+    global translate
+    translate = into_dtype([("values", float64, (3,))])
+
+
+def make_type_group_array(input_deck):
+    global group_array
+    G = input_deck.materials[0]["G"]
+    group_array = into_dtype([("values", float64, (G,))])
+
+
+def make_type_j_array(input_deck):
+    global j_array
+    J = input_deck.materials[0]["J"]
+    j_array = into_dtype([("values", float64, (J,))])
+
+
 def make_type_mesh(card):
     Nx = len(card["x"]) - 1
     Ny = len(card["y"]) - 1
@@ -1154,7 +1388,7 @@ def make_type_mesh(card):
     N_azi = len(card["azi"]) - 1
     Ng = len(card["g"]) - 1
     return (
-        np.dtype(
+        into_dtype(
             [
                 ("x", float64, (Nx + 1,)),
                 ("y", float64, (Ny + 1,)),
@@ -1183,7 +1417,7 @@ def make_type_mesh_(card):
     Nmu = len(card["mu"]) - 1
     N_azi = len(card["azi"]) - 1
     return (
-        np.dtype(
+        into_dtype(
             [
                 ("x", float64, (Nx + 1,)),
                 ("y", float64, (Ny + 1,)),
diff --git a/mcdc/visualizer.py b/mcdc/visualizer.py
index 3504ae30..56d505e9 100644
--- a/mcdc/visualizer.py
+++ b/mcdc/visualizer.py
@@ -9,7 +9,7 @@
     import distinctipy  # creates unlimited visually distinct colors for visualization
 
 except ImportError as e:
-    msg = "\n >> MC/DC visualization error: \n >> dependencies for visualization not installed \n >> install optional dependencies needed for visualization with \n >>     <pip install mcdc[viz]> (add para for mac)"
+    msg = "\n >> MC/DC visualization error: \n >> dependencies for visualization not installed \n >> install optional dependencies needed for visualization with \n >>     <pip install mcdc['viz']> (for mac: 'mcdc[viz]')"
     print_warning(msg)
 
 import tkinter as tk  # Tkinter is used to create the window for the time slider and color key
@@ -81,7 +81,7 @@ def create_cell_geometry(cell, current_time, surface_list, start_time, end_time)
                     surface_list[surface_ID]["I"],
                 )
 
-            # planes have to be intersected to achive the wanted visualization in ngsolve
+            # planes have to be intersected to achieve the wanted visualization in ngsolve
             cell_shape_list.append([Plane(Pnt(point), Vec(vector)), "intersect"])
 
         elif surface_list[surface_ID]["type"] == "plane-y":
@@ -113,7 +113,7 @@ def create_cell_geometry(cell, current_time, surface_list, start_time, end_time)
                     surface_list[surface_ID]["I"],
                 )
 
-            # planes have to be intersected to achive the wanted visualization in ngsolve
+            # planes have to be intersected to achieve the wanted visualization in ngsolve
             cell_shape_list.append(
                 [Plane(Pnt(point), Vec(vector)).col([1, 0, 0]), "intersect"]
             )
@@ -147,7 +147,7 @@ def create_cell_geometry(cell, current_time, surface_list, start_time, end_time)
                     surface_list[surface_ID]["I"],
                 )
 
-            # planes have to be intersected to achive the wanted visualization in ngsolve
+            # planes have to be intersected to achieve the wanted visualization in ngsolve
             cell_shape_list.append(
                 [Plane(Pnt(point), Vec(vector)).col([1, 0, 0]), "intersect"]
             )
@@ -266,7 +266,7 @@ def draw_Geometry(current_time, start_time, end_time, material_colors):
 
     geo = CSGeometry()  # create the ngsolve geometry object
 
-    # colors that should not be generated  by distinctipy(starts with visually unappleaning colors, manually set colors added later)
+    # colors that should not be generated  by distinctipy(starts with visually unappealing colors, manually set colors added later)
     # These colors are rgb values, more can be added by extending the list
     input_colors = [
         (1, 1, 1),  # white
@@ -275,7 +275,7 @@ def draw_Geometry(current_time, start_time, end_time, material_colors):
     # list of materials that need colors to be generated (ie not water or the source)
     material_colors_to_generate = []
 
-    # if the color of water and source are not set make them blue and green respectivly
+    # if the color of water and source are not set make them blue and green respectively
     # add manually specified colors to input colors.
     for cell in cell_list:
         cell_material_name = cell["material_name"]
@@ -414,7 +414,7 @@ def visualize(start_time=0, end_time=0, tick_interval=1, material_colors={}):
         # must be inside this loop so it doesn't launch when the visualizer is imported
         import netgen.gui
     except ImportError as e:
-        msg = "\n >> MC/DC visualization error: \n >> dependencies for visualization not installed \n >> install optional dependencies needed for visualization with \n >>     <pip install mcdc[viz]> (add para for mac)"
+        msg = "\n >> MC/DC visualization error: \n >> dependencies for visualization not installed \n >> install optional dependencies needed for visualization with \n >>     <pip install mcdc['viz']> (for mac: 'mcdc[viz]')"
         print_warning(msg)
 
     color_key_dic = draw_Geometry(
diff --git a/pyproject.toml b/pyproject.toml
index f94f9096..dc865eb0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,6 @@ authors = [
   { name="Joanna Morgan", email="morgajoa@oregonstate.edu" },
   { name="Kayla Clements", email="clemekay@oregonstate.edu" },
   { name="Braxton Cuneo", email="bcuneo@seattleu.edu" },
-  { name="Charles Goodman"},
   { name="Caleb Shaw"},
   { name="Rohan Pankaj"},
   { name="Alexander Mote"},
diff --git a/test/regression/dd_slab_reed/answer.h5 b/test/regression/dd_slab_reed/answer.h5
new file mode 100644
index 00000000..8ea0bb48
Binary files /dev/null and b/test/regression/dd_slab_reed/answer.h5 differ
diff --git a/test/regression/dd_slab_reed/input.py b/test/regression/dd_slab_reed/input.py
new file mode 100644
index 00000000..167e102b
--- /dev/null
+++ b/test/regression/dd_slab_reed/input.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+import mcdc
+
+# =============================================================================
+# Set model
+# =============================================================================
+# Three slab layers with different materials
+# Based on William H. Reed, NSE (1971), 46:2, 309-314, DOI: 10.13182/NSE46-309
+
+# Set materials
+m1 = mcdc.material(capture=np.array([50.0]))
+m2 = mcdc.material(capture=np.array([5.0]))
+m3 = mcdc.material(capture=np.array([0.0]))  # Vacuum
+m4 = mcdc.material(capture=np.array([0.1]), scatter=np.array([[0.9]]))
+
+# Set surfaces
+s1 = mcdc.surface("plane-z", z=0.0, bc="reflective")
+s2 = mcdc.surface("plane-z", z=2.0)
+s3 = mcdc.surface("plane-z", z=3.0)
+s4 = mcdc.surface("plane-z", z=5.0)
+s5 = mcdc.surface("plane-z", z=8.0, bc="vacuum")
+
+# Set cells
+mcdc.cell([+s1, -s2], m1)
+mcdc.cell([+s2, -s3], m2)
+mcdc.cell([+s3, -s4], m3)
+mcdc.cell([+s4, -s5], m4)
+
+# =============================================================================
+# Set source
+# =============================================================================
+
+# Isotropic source in the absorbing medium
+mcdc.source(z=[0.0, 2.0], isotropic=True, prob=50.0)
+
+# Isotropic source in the first half of the outermost medium,
+# with 1/100 strength
+mcdc.source(z=[5.0, 6.0], isotropic=True, prob=0.5)
+
+# =============================================================================
+# Set tally, setting, and run mcdc
+# =============================================================================
+
+mcdc.tally(scores=["flux"], z=np.linspace(0.0, 8.0, 81))
+
+# Setting
+mcdc.setting(N_particle=5000)
+mcdc.domain_decomposition(z=np.linspace(0.0, 8.0, 5))
+# Run
+mcdc.run()
diff --git a/test/regression/iqmc_cooper2/answer.h5 b/test/regression/iqmc_cooper2/answer.h5
index f9a79537..eecd5e4e 100644
Binary files a/test/regression/iqmc_cooper2/answer.h5 and b/test/regression/iqmc_cooper2/answer.h5 differ
diff --git a/test/regression/iqmc_cooper2/input.py b/test/regression/iqmc_cooper2/input.py
index be08b9cc..e2dec67e 100644
--- a/test/regression/iqmc_cooper2/input.py
+++ b/test/regression/iqmc_cooper2/input.py
@@ -55,7 +55,7 @@
     maxitt=maxit,
     tol=tol,
     generator=generator,
-    score=["tilt-x", "tilt-y", "tilt-xy"],
+    score=["tilt-x", "tilt-y"],
 )
 
 # =============================================================================
diff --git a/test/regression/iqmc_kornreich_davidson/answer.h5 b/test/regression/iqmc_kornreich_davidson/answer.h5
deleted file mode 100644
index 92a74a56..00000000
Binary files a/test/regression/iqmc_kornreich_davidson/answer.h5 and /dev/null differ
diff --git a/test/regression/iqmc_kornreich_davidson/input.py b/test/regression/iqmc_kornreich_davidson/input.py
deleted file mode 100644
index 497792bb..00000000
--- a/test/regression/iqmc_kornreich_davidson/input.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import numpy as np
-import h5py
-import mcdc
-
-
-# =========================================================================
-# Set model
-# =========================================================================
-# Based on Kornreich, ANE 2004, 31, 1477-1494,
-# DOI: 10.1016/j.anucene.2004.03.012
-
-# Set materials
-m1 = mcdc.material(
-    capture=np.array([0.0]),
-    scatter=np.array([[0.9]]),
-    fission=np.array([0.1]),
-    nu_p=np.array([6.0]),
-)
-m2 = mcdc.material(
-    capture=np.array([0.68]),
-    scatter=np.array([[0.2]]),
-    fission=np.array([0.12]),
-    nu_p=np.array([2.5]),
-)
-
-# Set surfaces
-s1 = mcdc.surface("plane-x", x=0.0, bc="vacuum")
-s2 = mcdc.surface("plane-x", x=1.5)
-s3 = mcdc.surface("plane-x", x=2.5, bc="vacuum")
-
-# Set cells
-mcdc.cell([+s1, -s2], m1)
-mcdc.cell([+s2, -s3], m2)
-
-# =========================================================================
-# iQMC Parameters
-# =========================================================================
-N = 10
-maxit = 5
-tol = 1e-3
-x = np.arange(0.0, 2.6, 0.1)
-Nx = len(x) - 1
-generator = "halton"
-solver = "davidson"
-fixed_source = np.zeros(Nx)
-phi0 = np.ones((Nx))
-
-# =========================================================================
-# Set tally, setting, and run mcdc
-# =========================================================================
-
-mcdc.iQMC(
-    x=x,
-    fixed_source=fixed_source,
-    phi0=phi0,
-    maxitt=maxit,
-    tol=tol,
-    generator=generator,
-    eigenmode_solver=solver,
-)
-# Setting
-mcdc.setting(N_particle=N)
-mcdc.eigenmode()
-
-# Run
-mcdc.run()
diff --git a/test/regression/iqmc_sood_davidson/answer.h5 b/test/regression/iqmc_sood_davidson/answer.h5
deleted file mode 100644
index 71ab0095..00000000
Binary files a/test/regression/iqmc_sood_davidson/answer.h5 and /dev/null differ
diff --git a/test/regression/iqmc_sood_davidson/input.py b/test/regression/iqmc_sood_davidson/input.py
deleted file mode 100644
index 381085c2..00000000
--- a/test/regression/iqmc_sood_davidson/input.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import numpy as np
-import h5py
-import mcdc
-
-
-# =========================================================================
-# Set model
-# =========================================================================
-# Based on Sood, PNE, Volume 42, Issue 1, 2003, Pages 55-106 2003,
-# "Analytical Benchmark Test Set For Criticality Code Verification"
-
-# 2G-U Slab data
-m1 = mcdc.material(
-    capture=np.array([0.01344, 0.00384]),
-    scatter=np.array([[0.26304, 0.0720], [0.00000, 0.078240]]),
-    fission=np.array([0.06912, 0.06192]),
-    nu_p=np.array([2.5, 2.7]),
-    chi_p=np.array([[0.425, 0.425], [0.575, 0.575]]),
-)
-
-# Set surfaces
-s1 = mcdc.surface("plane-x", x=0.0, bc="vacuum")
-s2 = mcdc.surface("plane-x", x=6.01275, bc="vacuum")
-
-# Set cells
-mcdc.cell([+s1, -s2], m1)
-
-# =========================================================================
-# iQMC Parameters
-# =========================================================================
-Nx = 5
-N = 10
-maxit = 5
-tol = 1e-3
-x = np.linspace(0.0, 6.01275, num=Nx + 1)
-generator = "halton"
-solver = "davidson"
-fixed_source = np.zeros(Nx)
-phi0 = np.ones((Nx))
-
-# =========================================================================
-# Set tally, setting, and run mcdc
-# =========================================================================
-
-mcdc.iQMC(
-    x=x,
-    phi0=phi0,
-    fixed_source=fixed_source,
-    maxitt=maxit,
-    tol=tol,
-    generator=generator,
-    eigenmode_solver=solver,
-)
-# Setting
-mcdc.setting(N_particle=N)
-mcdc.eigenmode()
-
-# Run
-mcdc.run()
diff --git a/test/regression/run.py b/test/regression/run.py
index eac007ba..3fa51ec7 100644
--- a/test/regression/run.py
+++ b/test/regression/run.py
@@ -5,6 +5,7 @@
 # Option parser
 parser = argparse.ArgumentParser(description="MC/DC regression test")
 parser.add_argument("--mode", type=str, choices=["python", "numba"], default="python")
+parser.add_argument("--target", type=str, choices=["cpu", "gpu"], default="cpu")
 parser.add_argument("--mpiexec", type=int, default=0)
 parser.add_argument("--srun", type=int, default=0)
 parser.add_argument("--name", type=str, default="ALL")
@@ -13,6 +14,7 @@
 
 # Parse
 mode = args.mode
+target = args.target
 mpiexec = args.mpiexec
 srun = args.srun
 name = args.name
@@ -32,8 +34,36 @@
 if skip != "NONE":
     skips = [item for item in os.listdir() if fnmatch.fnmatch(item, skip)]
     for name in skips:
+        print(Fore.YELLOW + "Note: Skipping %s" % name + Style.RESET_ALL)
         names.remove(name)
 
+# Skip cache if any
+if "__pycache__" in names:
+    names.remove("__pycache__")
+
+# Skip domain decomp tests unless there are 4 MPI processes
+temp = names.copy()
+for name in names:
+    if name[:3] == "dd_" and not (mpiexec == 4 or srun == 4):
+        temp.remove(name)
+        print(
+            Fore.YELLOW
+            + "Note: Skipping %s (require 4 MPI ranks)" % name
+            + Style.RESET_ALL
+        )
+names = temp
+
+# Skip iqmc if GPU run
+if target == "gpu":
+    temp = names.copy()
+    for name in names:
+        if "iqmc" in name:
+            temp.remove(name)
+            print(
+                Fore.YELLOW + "Note: Skipping %s (GPU target)" % name + Style.RESET_ALL
+            )
+names = temp
+
 # Data for each test
 printouts = []
 runtimes = []
@@ -44,10 +74,6 @@
 
 # Run all tests
 for i, name in enumerate(names):
-    # Skip cache if any
-    if name == "__pycache__":
-        continue
-
     print("\n[%i/%i] " % (i + 1, len(names)) + name)
     error_msgs.append([])
     crashes.append(False)
@@ -71,18 +97,18 @@
     # Run the test problem (redirect the stdout)
     if mpiexec > 1:
         os.system(
-            "mpiexec -n %i python input.py --mode=%s --output=output --no-progress-bar > tmp 2>&1"
-            % (mpiexec, mode)
+            "mpiexec -n %i python input.py --mode=%s --target=%s --output=output --no-progress-bar > tmp 2>&1"
+            % (mpiexec, mode, target)
         )
     elif srun > 1:
         os.system(
-            "srun -n %i python input.py --mode=%s --output=output --no-progress-bar > tmp 2>&1"
-            % (srun, mode)
+            "srun -n %i python input.py --mode=%s --target=%s --output=output --no-progress-bar > tmp 2>&1"
+            % (srun, mode, target)
         )
     else:
         os.system(
-            "python input.py --mode=%s --output=output --no-progress-bar > tmp 2>&1"
-            % (mode)
+            "python input.py --mode=%s --target=%s --output=output --no-progress-bar > tmp 2>&1"
+            % (mode, target)
         )
     with open("tmp") as f:
         printouts.append(f.read())
@@ -101,7 +127,7 @@
     answer = h5py.File("answer.h5", "r")
 
     runtimes[-1] = output["runtime/total"][()]
-    print("  (%.2f seconds)" % runtimes[-1])
+    print("  (%.2f seconds)" % runtimes[-1][0])
 
     # Compare all scores
     for score in [key for key in output["tally"].keys() if key != "grid"]:
diff --git a/test/regression/variance_deconv/input.py b/test/regression/variance_deconv/input.py
index fa3aedc6..f6076948 100644
--- a/test/regression/variance_deconv/input.py
+++ b/test/regression/variance_deconv/input.py
@@ -33,28 +33,3 @@
 mcdc.uq(material=m3, distribution="uniform", capture=np.array([0.5]))
 
 mcdc.run()
-
-# =========================================================================
-# Check output
-# =========================================================================
-
-output = h5py.File("output.h5", "r")
-answer = h5py.File("answer.h5", "r")
-for score in scores:
-    name = "tally/" + score + "/mean"
-    a = output[name][:]
-    b = answer[name][:]
-    assert np.isclose(a, b).all()
-
-    name = "tally/" + score + "/sdev"
-    a = output[name][:]
-    b = answer[name][:]
-    assert np.isclose(a, b).all()
-
-    name = "tally/" + score + "/uq_var"
-    a = output[name][:]
-    b = answer[name][:]
-    assert np.isclose(a, b).all()
-
-output.close()
-answer.close()
diff --git a/test/unit/test_kernel.py b/test/unit/test_kernel.py
index 836e2519..07724969 100644
--- a/test/unit/test_kernel.py
+++ b/test/unit/test_kernel.py
@@ -2,7 +2,7 @@
 import mcdc as MCDC
 from mcdc import type_
 from mcdc.main import closeout
-from mcdc.kernel import rng, AxV, FxV, HxV, preconditioner
+from mcdc.kernel import rng, AxV
 import mcdc.global_ as mcdc_
 
 input_deck = mcdc_.input_deck
@@ -46,7 +46,7 @@ def iqmc_dummy_mcdc_variable():
     x = np.arange(0.0, 2.6, 0.1)
     Nx = len(x) - 1
     generator = "halton"
-    solver = "davidson"
+    solver = "power_iteration"
     fixed_source = np.zeros(Nx)
     phi0 = np.ones((Nx))
 
@@ -130,93 +130,5 @@ def test_AxV_linearity():
     assert np.allclose(F1, F2, rtol=1e-10)
 
 
-def test_FxV_linearity():
-    """
-    FxV is a linear operator used for Davidson in iQMC.
-
-    Linear operators must satisfy conditions of additivity and multiplicity
-    defined as:
-            - Additivity: f(x+y) = f(x) + f(y)
-            - Multiplicity: f(cx) = cf(x)
-
-    We can test both properties with:
-            - f(a*x + b*y) = a*f(x) + b*f(y)
-    """
-    MCDC.reset_cards()
-
-    mcdc = iqmc_dummy_mcdc_variable()
-
-    size = mcdc["technique"]["iqmc"]["total_source"].size
-    np.random.seed(123456)
-    a = np.random.random()
-    b = np.random.random()
-    x = np.random.random((size, 1))
-    y = np.random.random((size, 1))
-
-    F1 = FxV((a * x + b * y), mcdc)
-    F2 = a * FxV(x, mcdc) + b * FxV(y, mcdc)
-    assert np.allclose(F1, F2, rtol=1e-10)
-
-
-def test_HxV_linearity():
-    """
-    HxV is a linear operator used for Davidson in iQMC.
-
-    Linear operators must satisfy conditions of additivity and multiplicity
-    defined as:
-            - Additivity: f(x+y) = f(x) + f(y)
-            - Multiplicity: f(cx) = cf(x)
-
-    We can test both properties with:
-            - f(a*x + b*y) = a*f(x) + b*f(y)
-    """
-    MCDC.reset_cards()
-
-    mcdc = iqmc_dummy_mcdc_variable()
-
-    size = mcdc["technique"]["iqmc"]["total_source"].size
-    np.random.seed(123456)
-    a = np.random.random()
-    b = np.random.random()
-    x = np.random.random((size, 1))
-    y = np.random.random((size, 1))
-
-    F1 = HxV((a * x + b * y), mcdc)
-    F2 = a * HxV(x, mcdc) + b * HxV(y, mcdc)
-    assert np.allclose(F1, F2, rtol=1e-10)
-
-
-def test_preconditioner_linearity():
-    """
-    perconditioner is a linear operator used for Davidson in iQMC.
-
-    Linear operators must satisfy conditions of additivity and multiplicity
-    defined as:
-            - Additivity: f(x+y) = f(x) + f(y)
-            - Multiplicity: f(cx) = cf(x)
-
-    We can test both properties with:
-            - f(a*x + b*y) = a*f(x) + b*f(y)
-    """
-    MCDC.reset_cards()
-
-    mcdc = iqmc_dummy_mcdc_variable()
-
-    size = mcdc["technique"]["iqmc"]["total_source"].size
-    np.random.seed(123456)
-    a = np.random.random()
-    b = np.random.random()
-    x = np.random.random((size,))
-    y = np.random.random((size,))
-
-    F1 = preconditioner((a * x + b * y), mcdc)
-    F2 = a * preconditioner(x, mcdc) + b * preconditioner(y, mcdc)
-    assert np.allclose(F1, F2, rtol=1e-10)
-
-
 # if __name__ == "__main__":
-# test_rn_basic()
-# test_AxV_linearity()
-#     test_FxV_linearity()
-#     test_HxV_linearity()
-#     test_preconditioner_linearity()
+#     test_AxV_linearity()