diff --git a/.appveyor.yml b/.appveyor.yml
deleted file mode 100644
index ee8d3033f2..0000000000
--- a/.appveyor.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-version: '1.0.{build}' # This number doesn't matter.
-
-pull_requests:
-  do_not_increment_build_number: true
-
-platform:
-  - x64
-
-clone_folder: C:\projects\theano
-
-environment:
-  BINSTAR_TOKEN:
-    secure: Z4ZN29hd1UKw4qUwSlpFk+58Ssa+DfIKSGhN3Wr5uOAsP3dCXrNDl5+ipVdzADFn
-  CONDA_LOC: "C:\\Miniconda-x64"
-  MKL_THREADING_LAYER: GNU
-
-install:
-  # This breaks conda-build because of git
-  - cmd: rmdir C:\cygwin /s /q
-  - cmd: call %CONDA_LOC%\Scripts\activate.bat
-  - cmd: set PYTHONUNBUFFERED=1
-  - cmd: conda install -n root --yes conda conda-env conda-build anaconda-client
-  - cmd: conda config --append channels mila-udem
-
-build: off
-
-test_script:
-  - cmd: for /f "tokens=*" %%i in ('python -c "import versioneer; print(versioneer.get_version())"') do set THEANO_VERSION=%%i
-  - cmd: echo %THEANO_VERSION%
-  - cmd: conda build --py 2.7 conda
-  - cmd: conda build --py 3.5 conda
-  - cmd: conda build --py 3.6 conda
-  - cmd: mkdir pkgs
-  - cmd: xcopy "%CONDA_LOC%"\conda-bld\win-64\theano* pkgs\ /Y
-  - ps: |
-      if($env:appveyor_repo_tag -eq 'True') {
-        cmd /c "anaconda -t $env:BINSTAR_TOKEN upload --user=mila-udem pkgs/* 2>&1"
-      }
-
-artifacts:
-  - path: pkgs/*
-    name: "Conda Packages"
diff --git a/.gitattributes b/.gitattributes
index 68e10f7066..9af3999b55 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1 @@
-theano/_version.py export-subst
+aesara/_version.py export-subst
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 76438d46ff..b01842ab2c 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,5 +1,3 @@
-If you have any questions, please ask the [theano-user mailing list](https://groups.google.com/forum/#!forum/theano-users) or [stackoverflow](http://stackoverflow.com/) (using the "theano" tag) first.
-
 ## Description of your problem or feature request
 
 **Please provide a minimal, self-contained, and reproducible example.**
@@ -17,8 +15,8 @@ If you have any questions, please ask the [theano-user mailing list](https://gro
 
 ## Versions and main components
 
-* Theano version:
-* Theano config (`python -c "import theano; print(theano.config)"`)
+* Aesara version:
+* Aesara config (`python -c "import aesara; print(aesara.config)"`)
 * Python version:
 * Operating system:
-* How did you install Theano: (conda/pip)
+* How did you install Aesara: (conda/pip)
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
index 4ce7f1a4e1..f794220898 100644
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -28,8 +28,8 @@ jobs:
           mkdir -p test-sdist
           cd test-sdist
           python -m venv venv-sdist
-          venv-sdist/bin/python -m pip install ../dist/Theano-PyMC-*.tar.gz
-          venv-sdist/bin/python -c "import theano;print(theano.__version__)"
+          venv-sdist/bin/python -m pip install ../dist/aesara-*.tar.gz
+          venv-sdist/bin/python -c "import aesara;print(aesara.__version__)"
       - uses: actions/upload-artifact@v2
         with:
           name: artifact
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index bebca54591..f47bf7b3fd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           filters: |
             python: &python
-            - 'theano/**/*.py'
+            - 'aesara/**/*.py'
             - 'tests/**/*.py'
             - '*.py'
             src:
@@ -120,18 +120,18 @@ jobs:
           if [[ "$PYTHON_VERSION" != "3.6" ]]; then mamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}=*_cpython" jax jaxlib; fi
           pip install -q -r requirements.txt
           mamba list && pip freeze
-          python -c 'import theano; print(theano.config.__str__(print_doc=False))'
-          python -c 'import theano; assert(theano.config.blas__ldflags != "")'
+          python -c 'import aesara; print(aesara.config.__str__(print_doc=False))'
+          python -c 'import aesara; assert(aesara.config.blas__ldflags != "")'
         env:
           PYTHON_VERSION: ${{ matrix.python-version }}
 
       - name: Run tests
         shell: bash -l {0}
         run: |
-          if [[ $FAST_COMPILE == "1" ]]; then export THEANO_FLAGS=$THEANO_FLAGS,mode=FAST_COMPILE; fi
-          if [[ $FLOAT32 == "1" ]]; then export THEANO_FLAGS=$THEANO_FLAGS,floatX=float32; fi
-          export THEANO_FLAGS=$THEANO_FLAGS,warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc__cxxflags=-pipe
-          python -m pytest -x -r A --verbose --runslow --cov=theano/ --cov-report=xml:coverage/coverage-${MATRIX_ID}.xml --no-cov-on-fail $PART
+          if [[ $FAST_COMPILE == "1" ]]; then export AESARA_FLAGS=$AESARA_FLAGS,mode=FAST_COMPILE; fi
+          if [[ $FLOAT32 == "1" ]]; then export AESARA_FLAGS=$AESARA_FLAGS,floatX=float32; fi
+          export AESARA_FLAGS=$AESARA_FLAGS,warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc__cxxflags=-pipe
+          python -m pytest -x -r A --verbose --runslow --cov=aesara/ --cov-report=xml:coverage/coverage-${MATRIX_ID}.xml --no-cov-on-fail $PART
         env:
           MATRIX_ID: ${{ steps.matrix-id.outputs.id }}
           MKL_THREADING_LAYER: GNU
diff --git a/.gitignore b/.gitignore
index f41f18d099..99e3306ab6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,11 +30,11 @@ doc/indexes/typelist.txt
 html
 pdf
 setuptools-*.egg
-theano/generated_version.py
-theano/generated_version.py.out
+aesara/generated_version.py
+aesara/generated_version.py.out
 distribute-*.egg
 distribute-*.tar.gz
-Theano.suo
+Aesara.suo
 .ipynb_checkpoints
 .pydevproject
 .ropeproject
@@ -44,5 +44,5 @@ core
 .mypy_cache/
 /htmlcov/
 
-theano-venv/
+aesara-venv/
 /notebooks/Sandbox*
diff --git a/.mailmap b/.mailmap
deleted file mode 100644
index 3db8e4b4c0..0000000000
--- a/.mailmap
+++ /dev/null
@@ -1,263 +0,0 @@
-# Prevent git from showing duplicate names with commands like "git shortlog"
-# # See the manpage of git-shortlog for details.
-# # The syntax is:
-# # Name that should be used <email that should be used> Bad name <bad email>
-# #
-# # You can skip Bad name if it is the same as the one that should be used, and is unique.
-# #
-# # This file is up-to-date if the command git log --format="%aN <%aE>" | sort -u
-# # gives no duplicates.
-#     5	Firstname Lastname <firstname.lastname@example.net>
-#     4	Laboratoire d'Informatique des Systemes Adaptatifs <lisa@iro.umontreal.ca>
-#    25	projects@lgcm <projects@lgcm>
-
-abalkin <abalkin@enlnt.com> abalkin <abalkin>
-abalkin <abalkin@enlnt.com> abalkin <serpent.speak@gmail.com>
-abalkin <abalkin@enlnt.com> Alexander Belopolsky <abalkin@enlnt.com>
-abalkin <abalkin@enlnt.com> Alexander Belopolsky <a@enlnt.com>
-Adam Becker <junkkhaotik@gmail.com> khaotik <aruhanb@gmail.com>
-Adam Becker <junkkhaotik@gmail.com> khaotik <junkkhaotik@gmail.com>
-Adrian Seyboldt <aseyboldt@gmail.com> aseyboldt <aseyboldt@gmail.com>
-Aleksandar Botev <botevmg@gmail.com> botev <botevmg@gmail.com>
-Alex Lamb <alex6200@gmail.com> AlexLamb <alex6200@gmail.com>
-Alex Lamb <alex6200@gmail.com> DeathMonster666 <alex6200@gmail.com>
-Alexandre de Brebisson <adbrebs@gmail.com> AdeB <adbrebs@gmail.com>
-Alexandre de Brebisson <adbrebs@gmail.com> Alexandre de Brébisson <adbrebs@users.noreply.github.com>
-Anatoly Belikov <awbelikov@gmail.com> Anatoly Belikov <wormblood@gmail.com>
-Andre Holzner <Andre.Georg.Holzner@cern.ch> Andre Holzner <holzner@pb-d-128-141-148-222.cern.ch>
-Andre Holzner <Andre.Georg.Holzner@cern.ch> andreh <andreh@localhost>
-Andre Holzner <Andre.Georg.Holzner@cern.ch> Andre Holzner <holzner@andres-mbp-2.fritz.box>
-Andrei Costinescu <andrei.costinescu@yahoo.com> Andrei Costinescu <AndreiCostinescu@users.noreply.github.com>
-Andrei Costinescu <andrei.costinescu@yahoo.com> AndreiCostinescu <andrei.costinescu@yahoo.com>
-Anirudh Goyal <anirudhgoyal9119@gmail.com> AndroidCloud <anirudhgoyal9119@gmail.com>
-Arjun Jain <arjunjain@gmail.com> Arjun Jain <stencilman@users.noreply.github.com>
-Arnaud Bergeron <abergeron@gmail.com> <abergeron@gmail.com>
-Arnaud Bergeron <abergeron@gmail.com> <bergearn@iro.umontreal.ca>
-<abergeron@gmail.com> <anakha@kami.(none)>
-Balázs Hidasi <hidasi.balazs@gravityrd.com> Balázs <hidasib@gmail.com>
-Bart van Merrienboer <bart.vanmerrienboer@gmail.com> Bart van Merriënboer <bart.vanmerrienboer@gmail.com>
-Bart van Merrienboer <bart.vanmerrienboer@gmail.com> Bart <bart.vanmerrienboer@gmail.com>
-Benjamin Scellier <scellier@iro.umontreal.ca> Benjamin Scellier <scellier@bart4.iro.umontreal.ca>
-Benjamin Scellier <scellier@iro.umontreal.ca> Benjamin Scellier <scellier@bart5>
-Benjamin Scellier <scellier@iro.umontreal.ca> Benjamin Scellier <scellier@eos13.iro.umontreal.ca>
-Benjamin Scellier <scellier@iro.umontreal.ca> Benjamin Scellier <scellier@sencha.iro.umontreal.ca>
-Benjamin Scellier <scellier@iro.umontreal.ca> Benjamin Scellier <benjamin.scellier@gmail.com>
-Benjamin Scellier <scellier@iro.umontreal.ca> bscellier <bscellier@users.noreply.github.com>
-Bogdan Budescu <bbudescu@gmail.com> bbudescu <bbudescu@gmail.com>
-Brian Cheung <briancheung@users.noreply.github.com> briancheung <bcheung5@gmail.com>
-Caglar <ca9lar@gmail.com> Caglar <caglar@users.noreply.github.com>
-Cesar Laurent <cesarlaurent77@gmail.com> César Laurent <Thrandis@users.noreply.github.com>
-Chienli Ma <maqianlie@gmail.com> Chienli Ma(马千里) <maqianlie@gmail.com>
-Chienli Ma <maqianlie@gmail.com> ChienliMa <maqianlie@gmail.com>
-Chiheb Trabelsi <chiheb.tr@gmail.com> Chiheb Trabelsi <trabelsc@bart5>
-Chinnadhurai Sankar <chinnadhurai@gmail.com> Chinnadhurai Sankar <sankarch@leto04.iro.umontreal.ca>
-Chinnadhurai Sankar <chinnadhurai@gmail.com> Chinnadhurai Sankar <sankarch@leto50.iro.umontreal.ca>
-Chinnadhurai Sankar <chinnadhurai@gmail.com> Chinnadhurai Sankar <sankarch@kepler2.iro.umontreal.ca>
-Chinnadhurai Sankar <chinnadhurai@gmail.com> Chinnadhurai Sankar <sankarch@kepler3.iro.umontreal.ca>
-Chinnadhurai Sankar <chinnadhurai@gmail.com> chinnadhurai <chinnadhurai@gmail.com>
-Claude Coulombe <claude.coulombe@gmail.com> Claude Coulombe <claude.coulombe@gmail.comgit statuslscd /Users/claudecoulombe/gitlspwdsay Isabellegit config --global user.email claude.coulombe@gmail.com>
-David Warde-Farley <wardefar@iro.umontreal.ca> David Warde-Farley <dwf@cs.toronto.edu>
-David Warde-Farley <wardefar@iro.umontreal.ca> David Warde Farley <dwf@cs.toronto.edu>
-David Warde-Farley <wardefar@iro.umontreal.ca> David Warde-Farley <d.warde.farley@gmail.com>
-Douglas Eck <douglas.eck@gmail.com> eckdoug@localhost <eckdoug@localhost>
-Douglas Eck <douglas.eck@gmail.com> eckdoug@waits.local <eckdoug@waits.local>
-Dmitrii Serdiuk <serdyuk.dmitriy@gmail.com> dima <serdyuk.dmitriy@gmail.com>
-Dmitrii Serdiuk <serdyuk.dmitriy@gmail.com> dmitriy-serdyuk <serdyuk.dmitriy@gmail.com>
-Dmitrii Serdiuk <serdyuk.dmitriy@gmail.com> serdyuk <serdyuk.dmitriy@gmail.com>
-Dumitru Erhan <dumitru.erhan@gmail.com> dumitru@deepnets.mtv.corp.google.com <dumitru@deepnets.mtv.corp.google.com>
-Dumitru Erhan <dumitru.erhan@gmail.com> erhandum@bikat.iro.umontreal.ca <erhandum@bikat.iro.umontreal.ca>
-Dzmitry Bahdanau <dimabgv@gmail.com> rizar <dimabv@tut.by>
-Eric Hunsberger <hunse@ctn> hunse <hunse@ctn>
-Ethan Buchman <ebuchman@uoguelph.ca> ebuchman <ebuchman@uoguelph.ca>
-Evelyn Mitchell <efm-github@linsomniac.com> evelynmitchell <efm-github@linsomniac.com>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk Ahmed <ahmedfar@bart7.iro.umontreal.ca>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk Ahmed <ahmedfar@elisa1.iro.umontreal.ca>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk Ahmed <ahmedfar@eos3.iro.umontreal.ca>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk Ahmed <ahmedfar@kepler2.iro.umontreal.ca>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk Ahmed <ahmedfar@kepler3.iro.umontreal.ca>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk Ahmed <ahmedfar@ceylon.iro.umontreal.ca>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk Ahmed <ahmedfar@leto21.iro.umontreal.ca>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk Ahmed <ahmedfar@leto23.iro.umontreal.ca>
-Faruk Ahmed <faruk.ahmed.91@gmail.com> Faruk-Ahmed <faruk.ahmed.91@gmail.com>
-Fei Wang <fay96816@gmail.com> fay <fay96816@gmail.com>
-Francesco Visin <fvisin@gmail.com> Francesco <fvisin@users.noreply.github.com>
-Francesco Visin <fvisin@gmail.com> fvisin <fvisin@gmail.com>
-Francois Savard <devnull@localhost> fsavard <devnull@localhost>
-Frederic Bastien <nouiz@nouiz.org> Frederic Bastien <bastienf@briaree1.rqchp.qc.ca>
-Frederic Bastien <nouiz@nouiz.org> Frederic Bastien <Frederic Bastien>
-Frederic Bastien <nouiz@nouiz.org> Frederic Bastien <bastienf@iro.umontreal.ca>
-Frederic Bastien <nouiz@nouiz.org> Frédéric Bastien <nouiz@nouiz.org>
-Frederic Bastien <nouiz@nouiz.org> Nouiz <nouiz@Nouiz-lap-ub.(none)>
-Frederic Bastien <nouiz@nouiz.org> Nouiz <nouiz@nouiz.org>
-Frederic Bastien <nouiz@nouiz.org> bastienf@bikat.iro.umontreal.ca <bastienf@bikat.iro.umontreal.ca>
-Frederic Bastien <nouiz@nouiz.org> bastienf@ldapk3.scinet.utoronto.ca <bastienf@ldapk3.scinet.utoronto.ca>
-Frederic Bastien <nouiz@nouiz.org> nouiz <nouiz@nouiz.org>
-Frederic Bastien <nouiz@nouiz.org> Frederic <nouiz@nouiz.org>
-Frederic Bastien <nouiz@nouiz.org> Frédéric Bastien <frederic.bastien@gmail.com>
-Frederic Bastien <nouiz@nouiz.org> theano-bot <frederic.bastien.1@umontreal.ca>
-Gennadiy Tupitsin <genichyar@genichyar.com> genichyar <genichyar@genichyar.com>
-Ghislain Antony Vaillant <ghisvail@gmail.com> Ghislain Antony Vaillant <ghisvail@users.noreply.github.com>
-Gokula Krishnan <gokul.uf@gmail.com> Gokul <gokul.uf@gmail.com>
-Grégoire Mesnil <gregoire.mesnil@gmail.com> Grégoire <gregoire.mesnil@laposte.net>
-Grégoire Mesnil <gregoire.mesnil@gmail.com> Grégoire <gregoire.mesnil@gmail.com>
-Guillaume Alain <guillaume.alain.umontreal@gmail.com> Guillaume Alain <gyomalin@gmail.com>
-Guillaume Desjardins <guillaume.desjardins@gmail.com> desjagui <devnull@localhost>
-Guillaume Desjardins <guillaume.desjardins@gmail.com> desjagui@atchoum.iro.umontreal.ca <desjagui@atchoum.iro.umontreal.ca>
-Guillaume Desjardins <guillaume.desjardins@gmail.com> desjagui@opale.iro.umontreal.ca <desjagui@opale.iro.umontreal.ca>
-Guillaume Desjardins <guillaume.desjardins@gmail.com> gdesjardins <devnull@localhost>
-Guillaume Desjardins <guillaume.desjardins@gmail.com> tutorial/debug_faq.txt <devnull@localhost>
-gw0 [http://gw.tnode.com/] <gw.2015@tnode.com> gw0 [http://gw.tnode.com/] <gw.2016@tnode.com>
-Hani Almousli <hani.mousli@gmail.com> Hani <hani.mousli@gmail.com>
-Hani Almousli <hani.mousli@gmail.com> HaniAlmousli <hani.mousli@gmail.com>
-Huy Nguyen <huy@huyng.com> huyng <huy@huyng.com>
-Ian Goodfellow <goodfellow.ian@gmail.com> Ian Goodfellow <devnull@localhost>
-Ian Goodfellow <goodfellow.ian@gmail.com> goodfeli <goodfellow.ian@gmail.com>
-Ian Goodfellow <goodfellow.ian@gmail.com> Ian Goodfellow <goodfellow@google.com>
-Ian Goodfellow <goodfellow.ian@gmail.com> Ian Goodfellow <ia3n@gryphon0.(none)>
-Iban Harlouchet <iban.harlouchet@gmail.com> Iban Harlouchet <harlouci@eos3.iro.umontreal.ca>
-Iulian Vlad Serban <julianserban@gmail.com> Iulian Vlad Serban <serbaniv@eos2.iro.umontreal.ca>
-Iulian Vlad Serban <julianserban@gmail.com> Iulian Vlad Serban <serbaniv@bart7.iro.umontreal.ca>
-Jakub Sygnowski <sygnowski@gmail.com> Jakub Sygnowski <sygi@google.com>
-James Bergstra <james.bergstra@gmail.com> James Bergstra <bergstrj@iro.umontreal.ca>
-James Bergstra <james.bergstra@gmail.com> bergstra@ip05.m <bergstra@ip05.m>
-James Bergstra <james.bergstra@gmail.com> bergstra@mlp4.ais.sandbox <bergstra@mlp4.ais.sandbox>
-James Bergstra <james.bergstra@gmail.com> bergstra@tikuanyin <bergstra@tikuanyin>
-James Bergstra <james.bergstra@gmail.com> bergstrj@iro.umontreal.ca <bergstrj@iro.umontreal.ca>
-James Bergstra <james.bergstra@gmail.com> bergstrj@lgcm <bergstrj@lgcm>
-James Bergstra <james.bergstra@gmail.com> james@X40 <james@X40>
-James Bergstra <james.bergstra@gmail.com> james@crane <james@crane>
-James Bergstra <james.bergstra@gmail.com> james@mackie <james@mackie>
-James Bergstra <james.bergstra@gmail.com> james@x40.unstable <james@x40.unstable>
-James Bergstra <james.bergstra@gmail.com> test_rng_mrg.py <devnull@localhost>
-Jan Schlüter <github@jan-schlueter.de> f0k <github@jan-schlueter.de>
-Jeremiah Lowin <jlowin@lowindata.com> Jeremiah Lowin <jlowin@gmail.com>
-Jeremiah Lowin <jlowin@lowindata.com> Jeremiah Lowin <jlowin@iHal.local>
-Jeremie Tanguay <tanguaj@iro.umontreal.ca> Tanjay94 <you@yourdomain.example.com>
-Jeremie Tanguay <tanguaj@iro.umontreal.ca> Jeremie Tanguay <tanguaj@bart4.iro.umontreal.ca>
-Jesse Livezey <jesse.livezey@berkeley.edu> JesseLivezey <jesse.livezey@gmail.com>
-Jesse Livezey <jesse.livezey@berkeley.edu> Jesse Livezey <jesse.livezey@gmail.com>
-João Victor Tozatti Risso <joaovictortr@gmail.com> João Victor Risso <joaovictor.risso@gmail.com>
-João Victor Tozatti Risso <joaovictortr@gmail.com> João Victor Tozatti Risso <joaovictor.risso@gmail.com>
-John Salvatier <jsalvatier@gmail.com> jsalvatier <jsalvatier@gmail.com>
-John Salvatier <jsalvatier@gmail.com> john salvatier <jsalvatier@gmail.com>
-John Schulman <john.d.schulman@gmail.com> joschu <john.d.schulman@gmail.com>
-jojolalpin <jojolalpin@gmail.com> jojolalpin <jojolalpin gmail>
-Joseph Turian <turian@iro.umontreal.ca> Joseph Turian <turian@gmail.com>
-Joseph Turian <turian@iro.umontreal.ca> turian@grenat.iro.umontreal.ca <turian@grenat.iro.umontreal.ca>
-Joseph Turian <turian@iro.umontreal.ca> turian@lgcm <turian@lgcm>
-Joseph Turian <turian@iro.umontreal.ca> turian@lsvm.iro.umontreal.ca <turian@lsvm.iro.umontreal.ca>
-Joseph Turian <turian@iro.umontreal.ca> turian@rubis.iro.umontreal.ca <turian@rubis.iro.umontreal.ca>
-Joseph Turian <turian@iro.umontreal.ca> turianjo@is23.m <turianjo@is23.m>
-Josh Bleecher Snyder <josharian@gmail.com> Josh Bleecher Snyder <josharian+github@gmail.com>
-Jörg Bornschein <jb@capsec.org> Joerg Bornschein <bornschein@fias.uni-frankfurt.de>
-Karthik Karanth <karanth.karthik@gmail.com> medakk <karanth.karthik@gmail.com>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <iamkelvinxu@gmail.com>
-Kelvin Xu <kelvin.xu@umontreal.ca> kelvinxu <iamkelvinxu@gmail.com>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <xukelvin@leto01.iro.umontreal.ca>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <xukelvin@eos1.iro.umontreal.ca>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <xukelvin@eos16.iro.umontreal.ca>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <xukelvin@bart5.iro.umontreal.ca>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <xukelvin@eos13.iro.umontreal.ca>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <xukelvin@eos7.iro.umontreal.ca>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <xukelvin@eos18.iro.umontreal.ca>
-Kelvin Xu <kelvin.xu@umontreal.ca> Kelvin Xu <xukelvin@eos20.iro.umontreal.ca>
-Kv Manohar <kvmanohar22@gmail.com> kvmanohar22 <kvmanohar22@gmail.com>
-Kyung Hyun Cho <cho.k.hyun@gmail.com> Kyunghyun Cho <kyunghyuncho@Kyunghyuns-MacBook-Pro.local>
-Kyung Hyun Cho <cho.k.hyun@gmail.com> Kyunghyun Cho <kyunghyuncho@kyunghyuns-mbp.sf.umontreal.ca>
-Li Yao <yaoli.email@gmail.com> Li Yao <li.yao@umontreal.ca>
-Li Yao <yaoli.email@gmail.com> Li Yao <yaoli@iro>
-Li Yao <yaoli.email@gmail.com> yaoli <yaoli.email@gmail.com>
-Li Yao <yaoli.email@gmail.com> Li <you@yourdomain.example.com>
-Liwei Cai <cai_lw@126.com> cai-lw <cai_lw@126.com>
-Lucas Beyer <lucasb.eyer.be@gmail.com> lucasb-eyer <lucasb.eyer.be@gmail.com>
-Ludwig Schmidt-Hackenberg <ludwig@iupr.com> Ludwig Schmidt-Hackenberg <ludwig@schmidt-hackenberg.net>
-Luke Metz <luke.metz@students.olin.edu> = <luke.metz@students.olin.edu>
-Markus Roth <markus.roth@herr-biber.de> Markus Roth <mail@rothmark.us>
-Mathieu Germain <mathieu.germain@gmail.com> Mathieu Germain <mathieu.germain2@usherbrooke.ca>
-Mehdi Mirza <memirzamo@gmail.com> Mehdi Mirza <memimo@users.noreply.github.com>
-Mehdi Mirza <memirzamo@gmail.com> memimo <memirzamo@gmail.com>
-Mohammed Affan <affanv14@gmail.com> affan <affanv14@gmail.com>
-Mohammed Affan <affanv14@gmail.com> affanv14 <affanv14@gmail.com>
-Mohammed Affan <affanv14@gmail.com> Ubuntu <ubuntu@ip-172-31-58-125.ec2.internal>
-Mohammad Pezeshki <mohammadpz@gmail.com> Mohammad Pezeshki <mohammad@mohammads-mbp.sf.umontreal.ca>
-Moslem Kazemi <moslemk@gmail.com> Moslem Kazemi <moslemk@users.noreply.github.com>
-Moslem Kazemi <moslemk@gmail.com> Mo <moslemk@gmail.com>
-Nan Rosemary Ke <rosemary.ke@west.cmu.edu> nke001 <rosemary.nan.ke@gmail.com>
-Nicolas Ballas <ballas.n@gmail.com> Kcub <ballas@lrde.epita.fr>
-Nicolas Ballas <ballas.n@gmail.com> ballasn <ballas.n@gmail.com>
-Nicolas Boulanger-Lewandowski <nicolas_boulanger@hotmail.com> boulanni <nicolas_boulanger@hotmail.com>
-Nicolas Pinto <pinto@alum.mit.edu> Nicolas Pinto <nicolas.pinto@gmail.com>
-Olivier Breuleux <breuleux@gmail.com> Olivier Breuleux <breuleuo@iro.umontreal.ca>
-Olivier Breuleux <breuleux@gmail.com> olivier@olivier-desktop <olivier@olivier-desktop>
-Olivier Breuleux <breuleux@gmail.com> olivier@ordinateur-de-olivier.local <olivier@ordinateur-de-olivier.local>
-Olivier Delalleau <delallea@iro> Olivier Delalleau <delallea@iro.umontreal.ca>
-Olivier Delalleau <delallea@iro> delallea <delallea@iro.umontreal.ca>
-Olivier Delalleau <delallea@iro> delallea@valhalla.apstat.com <delallea@valhalla.apstat.com>
-Orhan Firat <orhan.firat@ceng.metu.edu.tr> orhanf <orhan.firat@ceng.metu.edu.tr>
-Pascal Lamblin <lamblinp@iro.umontreal.ca> lamblin <lamblinp@iro.umontreal.ca>
-Pascal Lamblin <lamblinp@iro.umontreal.ca> lamblinp@lgcm <lamblinp@lgcm>
-Philippe Hamel <hamel.phil@gmail.com> Philippe  Hamel <hamel.phil@gmail.com>
-Philippe Hamel <hamel.phil@gmail.com> Philippe Hamel <higgsbosonh@hotmail.com>
-Pierre-Antoine Manzagol <pierre.antoine.manzagol@gmail.com> Pierre-Antoine Manzagol <manzagop@iro>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> --global <carrier.pierreluc@gmail.com>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> Pierre Luc Carrier <carriepl@bart2.iro.umontreal.ca>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> carriepl <carriepl@users.noreply.github.com>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> Pierre Luc Carrier <carriepl@grincheux.iro.umontreal.ca>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> Pierre Luc Carrier <carriepl@leprof.iro.umontreal.ca>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> Pierre Luc Carrier <carriepl@chai.iro.umontreal.ca>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> Pierre Luc Carrier <carriepl@eos3.iro.umontreal.ca>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> Pierre Luc Carrier <carriepl@bart2.iro.umontreal.ca>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> Pierre Luc Carrier <carriepl@bart3.iro.umontreal.ca>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> pl <carrier.pierreluc@gmail.com>
-Pierre Luc Carrier <carrier.pierreluc@gmail.com> carriepl <carrier.pierreluc@gmail.com>
-Ramana Subramanyam <vxrram95@gmail.com> sentient07 <vxrram95@gmail.com>
-Ramana Subramanyam <vxrram95@gmail.com> Ramana.S <vxrram95@gmail.com>
-Rami Al-Rfou' <rmyeid@gmail.com> Rami Al-Rfou <rmyeid@gmail.com>
-Raul Chandias Ferrari <devnull@localhost> chandiar <devnull@localhost>
-Razvan Pascanu <r.pascanu@gmail.com> Razvan Pascanu <pascanur@iro>
-Razvan Pascanu <r.pascanu@gmail.com> Razvan Pascanu <rman@rman-Dell-System-XPS-L502X.(none)>
-Razvan Pascanu <r.pascanu@gmail.com> Razvan Pascanu <rman@rman-pad.(none)>
-Razvan Pascanu <r.pascanu@gmail.com> pascanur@simplet.iro.umontreal.ca <pascanur@simplet.iro.umontreal.ca>
-Razvan Pascanu <r.pascanu@gmail.com> rman@rpad <rman@rpad>
-Reyhane Askari <r.askari.hemmat@gmail.com> Reyhane Askari <ReyhaneAskari@users.noreply.github.com>
-Roy Xue <xljroy@gmail.com> Lijun Xue <xljroy@gmail.com>
-Ruslana Makovetsky <ruslana@cim.mcgill.ca> ruslanagit <ruslana@cim.mcgill.ca>
-Sander Dieleman <sanderdieleman@gmail.com> benanne <sanderdieleman@gmail.com>
-Sebastian Berg <sebastian@sipsolutions.net> seberg <sebastian@sipsolutions.net>
-Sebastien Jean <jeasebas@iro.umontreal.ca> sebastien <jeasebas@iro.umontreal.ca>
-Sebastien Jean <jeasebas@iro.umontreal.ca> sebastien-j <jeasebas@iro.umontreal.ca>
-Sebastien Jean <jeasebas@iro.umontreal.ca> sebastien-j <sebastien.jean@mail.mcgill.ca>
-Simon Lefrancois <simon.lefrancois@umontreal.ca> slefrancois <simon.lefrancois@umontreal.ca>
-Simon Lefrancois <simon.lefrancois@umontreal.ca> Simon Lefrancois <lefransi@iro.umontreal.ca>
-Simon Lefrancois <simon.lefrancois@umontreal.ca> Jenkins <jenkins@milaburger.iro.umontreal.ca>
-Simon Lefrancois <simon.lefrancois@umontreal.ca> mila <mila@earlgrey.iro.umontreal.ca>
-Sina Honari <honaris@iro.umontreal.ca> SinaHonari <sina2222@gmail.com>
-Sina Honari <honaris@iro.umontreal.ca> Sina Honari <honaris@eos21.iro.umontreal.ca>
-Sina Honari <honaris@iro.umontreal.ca> Sina Honari <sina.honari@gmail.com>
-Søren Kaae Sønderby <skaaesonderby@gmail.com> skaae <skaaesonderby@gmail.com>
-Steven Bocco <stevenbocco@gmail.com> notoraptor <stevenbocco@gmail.com>
-Steven Bocco <stevenbocco@gmail.com> notoraptor <notoraptor@users.noreply.github.com>
-Steven Bocco <stevenbocco@gmail.com> Seton Steven Bocco <boccoset@elisa1.iro.umontreal.ca>
-Steven Bocco <stevenbocco@gmail.com> Seton Steven Bocco <boccoset@leto01.iro.umontreal.ca>
-Steven Bocco <stevenbocco@gmail.com> Seton Steven Bocco <boccoset@leto15.iro.umontreal.ca>
-Steven Bocco <stevenbocco@gmail.com> Seton Steven Bocco <boccoset@leto51.iro.umontreal.ca>
-Steven Pigeon <pigeon@iro.umontreal.ca> steven-pigeon <pigeon@iro.umontreal.ca>
-Thomas George <tfjgeorge@gmail.com> Thomas George <georgeth@helios1.helios>
-Thomas Wiecki <thomas.wiecki@gmail.com> twiecki <thomas.wiecki@gmail.com>
-Valentin Bisson <valentin.bisson@umontreal.ca> onze <onzeonline@gmail.com>
-Xavier Bouthillier <xavier.bouthillier@gmail.com> Xavier Bouthillier <xavier.bouthillier@umontreal.ca>
-Xavier Bouthillier <xavier.bouthillier@gmail.com> Xavier Bouthillier/ <xavier.bouthillier@gmail.com>
-Xavier Glorot <glorotxa@iro.umontreal.ca> glorotxa <glorotxa@iro.umontreal.ca>
-Xavier Glorot <glorotxa@iro.umontreal.ca> glorotxa@timide.iro.umontreal.ca <glorotxa@timide.iro.umontreal.ca>
-Vincent Dumoulin <vi.dumoulin@gmail.com> vdumoulin <vi.dumoulin@gmail.com>
-Vincent Michalski <vincent.michalski@umontreal.ca> Vincent Michalski <vmichals@rz.uni-frankfurt.de>
-Vitaliy Kurlin <vitaliykurin@gmail.com> yobibyte <vitaliykurin@gmail.com>
-Vivek Kulkarni <viveksck@gmail.com> Vivek Kulkarni <vvkulkarni@cs.stonybrook.edu>
-Wei Li <kuantkid@gmail.com> kuantkid <kuantkid@gmail.com>
-Yann N. Dauphin <yann@dauphin.io> Yann N. Dauphin <dhaemon@gmail.com>
-Yaroslav Ganin <ganin@skoltech.ru> Yaroslav Ganin <ganin@skolkovotech.ru>
-Yikang Shen <yikang.shn@gmail.com> yikang <yikang.shn@gmail.com>
-Yoshua Bengio <bengioy@iro.umontreal.ca> bengioy@bengio-mac.local <bengioy@bengio-mac.local>
-Ziye Fan <fanziye.cis@gmail.com> FanZiye(t13m) <fanziye.cis@gmail.com>
-Zhouhan LIN <lin.zhouhan@gmail.com> hantek <lin.zhouhan@gmail.com>
-Zhouhan LIN <lin.zhouhan@gmail.com> Zhouhan LIN <hantek@Zhouhans-MacBook-Pro.local>
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1a69879580..9e810d6bf7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 exclude: |
     (?x)^(
         versioneer\.py|
-        theano/_version\.py|
+        aesara/_version\.py|
         doc/.*|
         bin/.*
     )$
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 21ea13e2ac..aa9f7c33f3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,30 +1,29 @@
-If you want to contribute to Theano, have a look at the instructions here:
-http://deeplearning.net/software/theano/dev_start_guide.html
+If you want to contribute to Aesara, have a look at the instructions here:
+https://aesara.readthedocs.io/en/latest/dev_start_guide.html
 
 
-## Migrating PRs from original Theano Repo
-Theano-PyMC is actively merging new changes. If you have a pull request on the original respository and would like to move it here use the following commands in your local theano repo
+## Migrating PRs from the original Theano Repo
+Aesara is actively merging new changes. If you have a pull request on the original Theano respository and would like to move it here use the following commands in your local Aesara repository:
 
 ```
-# Go to your Theano Repo
+# Go to your Aesara repo
 cd /path/to/your/repo
 
-# If you'd like to add theano-PyMC as a remote
-git remote add pymc git@github.com:pymc-devs/Theano-PyMC.git
+# If you'd like to add aesara as a remote
+git remote add aesara git@github.com:pymc-devs/aesara.git
 
-# Verify the changes. You should see the pymc-devs/Theano-PyMC.git
+# Verify the changes. You should see the pymc-devs/aesara.git
 git remote -v
 
 # Checkout the branch of your request
 git checkout branch_name
 
-# Push to Theano-PyMC
-git push pymc branch_name
+# Push to Aesara
+git push aesara branch_name
 ```
 
-If you'd like to completely run this command instead
+If you would like to make Aesara the new "main" upstream remote:
 
 ```
-# If you'd like to replace this repo as a remote
-git remote set-url origin git@github.com:pymc-devs/Theano-PyMC.git
+git remote set-url upstream git@github.com:pymc-devs/aesara.git
 ```
diff --git a/DESCRIPTION.txt b/DESCRIPTION.txt
index 4641cfebcb..c9b31e2039 100644
--- a/DESCRIPTION.txt
+++ b/DESCRIPTION.txt
@@ -1,14 +1,10 @@
-Theano is a Python library that allows you to define, optimize, and efficiently evaluate mathematical expressions involving multi-dimensional arrays. It is built on top of NumPy_. Theano features:
+Aesara is a Python library that allows you to define, optimize, and efficiently evaluate mathematical expressions involving multi-dimensional arrays. It is built on top of NumPy_. Aesara features:
 
- * **tight integration with NumPy:** a similar interface to NumPy's. numpy.ndarrays are also used internally in Theano-compiled functions.
+ * **tight integration with NumPy:** a similar interface to NumPy's. numpy.ndarrays are also used internally in Aesara-compiled functions.
  * **transparent use of a GPU:** perform data-intensive computations up to 140x faster than on a CPU (support for float32 only).
- * **efficient symbolic differentiation:** Theano can compute derivatives for functions of one or many inputs.
+ * **efficient symbolic differentiation:** Aesara can compute derivatives for functions of one or many inputs.
  * **speed and stability optimizations:** avoid nasty bugs when computing expressions such as log(1 + exp(x)) for large values of x.
  * **dynamic C code generation:** evaluate expressions faster.
  * **extensive unit-testing and self-verification:** includes tools for detecting and diagnosing bugs and/or potential problems.
 
-Theano has been powering large-scale computationally intensive scientific
-research since 2007, but it is also approachable enough to be used in the
-classroom (IFT6266 at the University of Montreal).
-
 .. _NumPy: http://numpy.scipy.org/
diff --git a/EMAIL.txt b/EMAIL.txt
deleted file mode 100644
index fac6541a57..0000000000
--- a/EMAIL.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-===========================
- Announcing Theano 0.5
-===========================
-
-## You can select and adapt one of the following templates.
-
-## Basic text for major version release:
-
-This is a release for a major version, with lots of new
-features, bug fixes, and some interface changes (deprecated or
-potentially misleading features were removed).
-
-Upgrading to Theano 0.5 is recommended for everyone, but you should first make
-sure that your code does not raise deprecation warnings with Theano 0.4.1.
-Otherwise, in one case the results can change. In other cases, the warnings are
-turned into errors (see below for details).
-
-For those using the bleeding edge version in the
-git repository, we encourage you to update to the `0.5` tag.
-
-
-## Basic text for major version release candidate:
-
-This is a release candidate for a major version, with lots of new
-features, bug fixes, and some interface changes (deprecated or
-potentially misleading features were removed).
-
-The upgrade is recommended for developers who want to help test and
-report bugs, or want to use new features now.  If you have updated
-to 0.5rc1, you are highly encouraged to update to 0.5rc2.
-
-For those using the bleeding edge version in the
-git repository, we encourage you to update to the `0.5rc2` tag.
-
-
-## Basic text for minor version release:
-
-TODO
-
-
-## Basic text for minor version release candidate:
-
-TODO
-
-What's New
-----------
-
-[Include the content of NEWS.txt here]
-
-
-Download and Install
---------------------
-
-You can download Theano from http://pypi.python.org/pypi/Theano
-
-Installation instructions are available at
-http://deeplearning.net/software/theano/install.html
-
-Description
------------
-
-Theano is a Python library that allows you to define, optimize, and
-efficiently evaluate mathematical expressions involving
-multi-dimensional arrays. It is built on top of NumPy. Theano
-features:
-
- * tight integration with NumPy: a similar interface to NumPy's.
-   numpy.ndarrays are also used internally in Theano-compiled functions.
- * transparent use of a GPU: perform data-intensive computations much faster than on a CPU.
- * efficient symbolic differentiation: Theano can compute derivatives
-   for functions of one or many inputs.
- * speed and stability optimizations: avoid nasty bugs when computing
-   expressions such as log(1+ exp(x)) for large values of x.
- * dynamic C code generation: evaluate expressions faster.
- * extensive unit-testing and self-verification: includes tools for
-   detecting and diagnosing bugs and/or potential problems.
-
-Theano has been powering large-scale computationally intensive
-scientific research since 2007, but it is also approachable
-enough to be used in the classroom (IFT6266 at the University of Montreal).
-
-Resources
----------
-
-About Theano:
-
-http://deeplearning.net/software/theano/
-
-Theano-related projects:
-
-http://github.com/Theano/Theano/wiki/Related-projects
-
-About NumPy:
-
-http://numpy.scipy.org/
-
-About SciPy:
-
-http://www.scipy.org/
-
-Machine Learning Tutorial with Theano on Deep Architectures:
-
-http://deeplearning.net/tutorial/
-
-Acknowledgments
----------------
-
-
-
-I would like to thank all contributors of Theano. For this particular
-release, many people have helped, notably (in alphabetical order):
-[Generate the list of commiters: git shortlog -s <previous_tag>...| cut -c8-]
-
-I would also like to thank users who submitted bug reports, notably:
-[TODO]
-
-Also, thank you to all NumPy and Scipy developers as Theano builds on
-their strengths.
-
-All questions/comments are always welcome on the Theano
-mailing-lists ( http://deeplearning.net/software/theano/#community )
-
-
diff --git a/HISTORY.txt b/HISTORY.txt
deleted file mode 100644
index d8eb3eaeae..0000000000
--- a/HISTORY.txt
+++ /dev/null
@@ -1,3623 +0,0 @@
-
-.. _HISTORY:
-
-=================
-Old Release Notes
-=================
-
-Theano 1.0.4 (16th of January 2019)
-=====================================
-
-This is a maintenance release of Theano, version ``1.0.4``, with no
-new features, but some important bug fixes.
-
-We recommend that everybody update to this version.
-
-Highlights (since 1.0.3):
-
- - Theano is now compatible with NumPy 1.16.
-
-A total of 10 people contributed to this release since ``1.0.3``:
-
- - wonghang
- - Frederic Bastien
- - Arnaud Bergeron
- - Duc Nguyen
- - Andrew Nelson
- - Björn Linse
- - Luis Mario Domenzain
- - Rebecca N. Palmer
- - Luciano Paz
- - Dan Foreman-Mackey
-
-Theano 1.0.3 (20th of September 2018)
-=====================================
-
-This is a maintenance release of Theano, version ``1.0.3``, with no
-new features, but some important bug fixes.
-
-We recommend that everybody update to this version.
-
-Highlights (since 1.0.2):
-
- - Theano is now compatible with Python 3.7
- - Broadcasting for sparse dot products works correctly
- - Subtensor grads do not return int anymore
-
-A total of 5 people contributed to this release since ``1.0.2``:
-
- - Frederic Bastien
- - Arnaud Bergeron
- - Dmitry Mottl
- - Adrian Seyboldt
- - Thomas Wiecki
-
-
-Theano 1.0.2 (23rd of May, 2018)
-====================================
-
-This is a maintenance release of Theano, version ``1.0.2``, with no
-new features, but some important bug fixes.
-
-We recommend that everybody update to this version.
-
-Highlights (since 1.0.1):
-
- - Theano should work under PyPy now (this is experimental).
- - Update for cuDNN 7.1 RNN API changes.
- - Fix for a crash related to mixed dtypes with cuDNN convolutions.
- - MAGMA should work in more cases without manual config.
- - Handle reductions with non-default accumulator dtype better on the GPU.
- - Improvements to the test suite so that it fails less often due to
-   random chance.
-
-A total of 6 people contributed to this release since ``1.0.1``:
-
- - Frederic Bastien
- - Steven Bocco
- - Jon Haygood
- - Arnaud Bergeron
- - Jordan Melendez
- - Desiree Vogt-Lee
- - Garming Sam
- - Pascal Lamblin
- - Vincent Dumoulin
- - Glexin
- - Simon Lefrancois
-
-
-Theano 1.0.1 (6th of December, 2017)
-====================================
-
-This is a maintenance release of Theano, version ``1.0.1``, with no
-new features, but some important bug fixes.
-
-We recommend that everybody update to this version.
-
-Highlights (since 1.0.0):
-
- - Fixed compilation and improved float16 support for topK on GPU
-
-   - **NB**: topK support on GPU is experimental and may not work for
-             large input sizes on certain GPUs
-
- - Fixed cuDNN reductions when axes to reduce have size ``1``
- - Attempted to prevent re-initialization of the GPU in a child process
- - Fixed support for temporary paths with spaces in Theano initialization
- - Spell check pass on the documentation
-
-A total of 6 people contributed to this release since ``1.0.0``:
-
- - Frederic Bastien
- - Steven Bocco
- - Arnaud Bergeron
- - Sam Johnson
- - Edward Betts
- - Simon Lefrancois
-
-
-Theano 1.0.0 (15th of November, 2017)
-=====================================
-
-This is a final release of Theano, version ``1.0.0``, with a lot of
-new features, interface changes, improvements and bug fixes.
-
-We recommend that everybody update to this version.
-
-Highlights (since 0.9.0):
- - Announcing that `MILA will stop developing Theano <https://groups.google.com/d/msg/theano-users/7Poq8BZutbY/rNCIfvAEAwAJ>`_
- - conda packages now available and updated in our own conda channel ``mila-udem``
-   To install it: ``conda install -c mila-udem theano pygpu``
- - Support NumPy ``1.13``
- - Support pygpu ``0.7``
- - Moved Python ``3.*`` minimum supported version from ``3.3`` to ``3.4``
- - Added conda recipe
- - Replaced deprecated package ``nose-parameterized`` with up-to-date package ``parameterized`` for Theano requirements
- - Theano now internally uses ``sha256`` instead of ``md5`` to work on systems that forbid ``md5`` for security reason
- - Removed old GPU backend ``theano.sandbox.cuda``. New backend ``theano.gpuarray`` is now the official GPU backend
- - Make sure MKL uses GNU OpenMP
-
-   - **NB**: Matrix dot product (``gemm``) with ``mkl`` from conda
-     could return wrong results in some cases. We have reported the problem upstream
-     and we have a work around that raises an error with information about how to fix it.
-
- - Improved elemwise operations
-
-   - Speed-up elemwise ops based on SciPy
-   - Fixed memory leaks related to elemwise ops on GPU
-
- - Scan improvements
-
-   - Speed up Theano scan compilation and gradient computation
-   - Added meaningful message when missing inputs to scan
-
- - Speed up graph toposort algorithm
- - Faster C compilation by massively using a new interface for op params
- - Faster optimization step, with new optional destroy handler
- - Documentation updated and more complete
-
-   - Added documentation for RNNBlock
-   - Updated ``conv`` documentation
-
- - Support more debuggers for ``PdbBreakpoint``
- - Many bug fixes, crash fixes and warning improvements
-
-A total of 71 people contributed to this release since 0.9.0, see list below.
-
-Interface changes:
- - Merged duplicated diagonal functions into two ops: ``ExtractDiag`` (extract a diagonal to a vector),
-   and ``AllocDiag`` (set a vector as a diagonal of an empty array)
- - Removed op ``ExtractDiag`` from ``theano.tensor.nlinalg``, now only in ``theano.tensor.basic``
- - Generalized ``AllocDiag`` for any non-scalar input
- - Added new parameter ``target`` for MRG functions
- - Renamed ``MultinomialWOReplacementFromUniform`` to ``ChoiceFromUniform``
- - Changed ``grad()`` method to ``L_op()`` in ops that need the outputs to compute gradient
-
- - Removed or deprecated Theano flags:
-
-   - ``cublas.lib``
-   - ``cuda.enabled``
-   - ``enable_initial_driver_test``
-   - ``gpuarray.sync``
-   - ``home``
-   - ``lib.cnmem``
-   - ``nvcc.*`` flags
-   - ``pycuda.init``
-
-Convolution updates:
- - Implemented separable convolutions for 2D and 3D
- - Implemented grouped convolutions for 2D and 3D
- - Added dilated causal convolutions for 2D
- - Added unshared convolutions
- - Implemented fractional bilinear upsampling
- - Removed old ``conv3d`` interface
- - Deprecated old ``conv2d`` interface
-
-GPU:
- - Added a meta-optimizer to select the fastest GPU implementations for convolutions
- - Prevent GPU initialization when not required
- - Added disk caching option for kernels
- - Added method ``my_theano_function.sync_shared()`` to help synchronize GPU Theano functions
- - Added useful stats for GPU in profile mode
- - Added Cholesky op based on ``cusolver`` backend
- - Added GPU ops based on `magma library <http://icl.cs.utk.edu/magma/software/>`_:
-   SVD, matrix inverse, QR, cholesky and eigh
- - Added ``GpuCublasTriangularSolve``
- - Added atomic addition and exchange for ``long long`` values in ``GpuAdvancedIncSubtensor1_dev20``
- - Support log gamma function for all non-complex types
- - Support GPU SoftMax in both OpenCL and CUDA
- - Support offset parameter ``k`` for ``GpuEye``
- - ``CrossentropyCategorical1Hot`` and its gradient are now lifted to GPU
-
- - cuDNN:
-
-   - Official support for ``v6.*`` and ``v7.*``
-   - Added spatial transformation operation based on cuDNN
-   - Updated and improved caching system for runtime-chosen cuDNN convolution algorithms
-   - Support cuDNN v7 tensor core operations for convolutions with runtime timed algorithms
-   - Better support and loading on Windows and Mac
-   - Support cuDNN v6 dilated convolutions
-   - Support cuDNN v6 reductions for contiguous inputs
-   - Optimized ``SUM(x^2)``, ``SUM(ABS(X))`` and ``MAX(ABS(X))`` operations with cuDNN reductions
-   - Added new Theano flags ``cuda.include_path``, ``dnn.base_path`` and ``dnn.bin_path``
-     to help configure Theano when CUDA and cuDNN can not be found automatically
-   - Extended Theano flag ``dnn.enabled`` with new option ``no_check`` to help speed up cuDNN importation
-   - Disallowed ``float16`` precision for convolution gradients
-   - Fixed memory alignment detection
-   - Added profiling in C debug mode (with theano flag ``cmodule.debug=True``)
-   - Added Python scripts to help test cuDNN convolutions
-   - Automatic addition of cuDNN DLL path to ``PATH`` environment variable on Windows
-
- - Updated ``float16`` support
-
-   - Added documentation for GPU float16 ops
-   - Support ``float16`` for ``GpuGemmBatch``
-   - Started to use ``float32`` precision for computations that don't support ``float16`` on GPU
-
-New features:
- - Implemented truncated normal distribution with box-muller transform
- - Added ``L_op()`` overriding option for ``OpFromGraph``
- - Added NumPy C-API based fallback implementation for ``[sd]gemv_`` and ``[sd]dot_``
- - Implemented ``topk`` and ``argtopk`` on CPU and GPU
- - Implemented ``max()`` and ``min()`` functions for booleans and unsigned integers types
- - Added ``tensor6()`` and ``tensor7()`` in ``theano.tensor`` module
- - Added boolean indexing for sub-tensors
- - Added covariance matrix function ``theano.tensor.cov``
- - Added a wrapper for `Baidu's CTC <https://github.com/baidu-research/warp-ctc>`_ cost and gradient functions
- - Added scalar and elemwise CPU ops for modified Bessel function of order 0 and 1 from ``scipy.special``
- - Added Scaled Exponential Linear Unit (SELU) activation
- - Added sigmoid_binary_crossentropy function
- - Added tri-gamma function
- - Added ``unravel_index`` and ``ravel_multi_index`` functions on CPU
- - Added modes ``half`` and ``full`` for ``Images2Neibs`` ops
- - Implemented gradient for ``AbstractBatchNormTrainGrad``
- - Implemented gradient for matrix pseudoinverse op
- - Added new prop `replace` for ``ChoiceFromUniform`` op
- - Added new prop ``on_error`` for CPU ``Cholesky`` op
- - Added new Theano flag ``deterministic`` to help control how Theano optimize certain ops that have deterministic versions.
-   Currently used for subtensor Ops only.
- - Added new Theano flag ``cycle_detection`` to speed-up optimization step by reducing time spending in inplace optimizations
- - Added new Theano flag ``check_stack_trace`` to help check the stack trace during optimization process
- - Added new Theano flag ``cmodule.debug`` to allow a debug mode for Theano C code. Currently used for cuDNN convolutions only.
- - Added new Theano flag ``pickle_test_value`` to help disable pickling test values
-
-Others:
- - Kept stack trace for optimizations in new GPU backend
- - Added deprecation warning for the softmax and logsoftmax vector case
- - Added a warning to announce that C++ compiler will become mandatory in next Theano release ``0.11``
- - Added ``R_op()`` for ``ZeroGrad``
- - Added description for rnnblock
-
-Other more detailed changes:
- - Fixed invalid casts and index overflows in ``theano.tensor.signal.pool``
- - Fixed gradient error for elemwise ``minimum`` and ``maximum`` when compared values are the same
- - Fixed gradient for ``ARange``
- - Removed ``ViewOp`` subclass during optimization
- - Removed useless warning when profile is manually disabled
- - Added tests for abstract conv
- - Added options for `disconnected_outputs` to Rop
- - Removed ``theano/compat/six.py``
- - Removed ``COp.get_op_params()``
- - Support of list of strings for ``Op.c_support_code()``, to help not duplicate support codes
- - Macro names provided for array properties are now standardized in both CPU and GPU C codes
- - Moved all C code files into separate folder ``c_code`` in every Theano module
- - Many improvements for Travis CI tests (with better splitting for faster testing)
- - Many improvements for Jenkins CI tests: daily testings on Mac and Windows in addition to Linux
-
-Commiters since 0.9.0:
- - Frederic Bastien
- - Steven Bocco
- - João Victor Tozatti Risso
- - Arnaud Bergeron
- - Mohammed Affan
- - amrithasuresh
- - Pascal Lamblin
- - Reyhane Askari
- - Alexander Matyasko
- - Shawn Tan
- - Simon Lefrancois
- - Adam Becker
- - Vikram
- - Gijs van Tulder
- - Faruk Ahmed
- - Thomas George
- - erakra
- - Andrei Costinescu
- - Boris Fomitchev
- - Zhouhan LIN
- - Aleksandar Botev
- - jhelie
- - xiaoqie
- - Tegan Maharaj
- - Matt Graham
- - Cesar Laurent
- - Gabe Schwartz
- - Juan Camilo Gamboa Higuera
- - Tim Cooijmans
- - Anirudh Goyal
- - Saizheng Zhang
- - Yikang Shen
- - vipulraheja
- - Florian Bordes
- - Sina Honari
- - Chiheb Trabelsi
- - Shubh Vachher
- - Daren Eiri
- - Joseph Paul Cohen
- - Laurent Dinh
- - Mohamed Ishmael Diwan Belghazi
- - Jeff Donahue
- - Ramana Subramanyam
- - Bogdan Budescu
- - Dzmitry Bahdanau
- - Ghislain Antony Vaillant
- - Jan Schlüter
- - Nan Jiang
- - Xavier Bouthillier
- - fo40225
- - mrTsjolder
- - wyjw
- - Aarni Koskela
- - Adam Geitgey
- - Adrian Keet
- - Adrian Seyboldt
- - Anmol Sahoo
- - Chong Wu
- - Holger Kohr
- - Jayanth Koushik
- - Lilian Besson
- - Lv Tao
- - Michael Manukyan
- - Murugesh Marvel
- - NALEPA
- - Rebecca N. Palmer
- - Zotov Yuriy
- - dareneiri
- - lrast
- - morrme
- - naitonium
-
-
-Theano 1.0.0rc1 (30th of October, 2017)
-=======================================
-
-This release contains new features, improvements and bug fixes to prepare the upcoming release.
-
-We recommend that every developer updates to this version.
-
-Highlights:
- - Make sure MKL uses GNU OpenMP
-
-   - **NB**: Matrix dot product (``gemm``) with ``mkl`` from conda
-     could return wrong results in some cases. We have reported the problem upstream
-     and we have a work around that raises an error with information about how to fix it.
-
- - Optimized ``SUM(x^2)``, ``SUM(ABS(X))`` and ``MAX(ABS(X))`` operations with cuDNN reductions
- - Added Python scripts to help test cuDNN convolutions
- - Fixed invalid casts and index overflows in ``theano.tensor.signal.pool``
-
-A total of 71 people contributed to this release since 0.9.0, see list below.
-
-Commiters since 0.9.0:
- - Frederic Bastien
- - Steven Bocco
- - João Victor Tozatti Risso
- - Arnaud Bergeron
- - Mohammed Affan
- - amrithasuresh
- - Pascal Lamblin
- - Reyhane Askari
- - Alexander Matyasko
- - Shawn Tan
- - Simon Lefrancois
- - Adam Becker
- - Vikram
- - Gijs van Tulder
- - Faruk Ahmed
- - Thomas George
- - erakra
- - Andrei Costinescu
- - Boris Fomitchev
- - Zhouhan LIN
- - Aleksandar Botev
- - jhelie
- - xiaoqie
- - Tegan Maharaj
- - Matt Graham
- - Cesar Laurent
- - Gabe Schwartz
- - Juan Camilo Gamboa Higuera
- - Tim Cooijmans
- - Anirudh Goyal
- - Saizheng Zhang
- - Yikang Shen
- - vipulraheja
- - Florian Bordes
- - Sina Honari
- - Chiheb Trabelsi
- - Shubh Vachher
- - Daren Eiri
- - Joseph Paul Cohen
- - Laurent Dinh
- - Mohamed Ishmael Diwan Belghazi
- - Jeff Donahue
- - Ramana Subramanyam
- - Bogdan Budescu
- - Dzmitry Bahdanau
- - Ghislain Antony Vaillant
- - Jan Schlüter
- - Nan Jiang
- - Xavier Bouthillier
- - fo40225
- - mrTsjolder
- - wyjw
- - Aarni Koskela
- - Adam Geitgey
- - Adrian Keet
- - Adrian Seyboldt
- - Anmol Sahoo
- - Chong Wu
- - Holger Kohr
- - Jayanth Koushik
- - Lilian Besson
- - Lv Tao
- - Michael Manukyan
- - Murugesh Marvel
- - NALEPA
- - Rebecca N. Palmer
- - Zotov Yuriy
- - dareneiri
- - lrast
- - morrme
- - naitonium
-
-
-Theano 0.10.0beta4 (16th of October, 2017)
-==========================================
-
-This release contains new features, improvements and bug fixes to prepare the upcoming release candidate.
-
-We recommend that every developer updates to this version.
-
-Highlights:
- - Announcing that `MILA will stop developing Theano <https://groups.google.com/d/msg/theano-users/7Poq8BZutbY/rNCIfvAEAwAJ>`_
- - Bug fixes, crash fixes, warning improvements and documentation updates
-
-A total of 70 people contributed to this release since 0.9.0, see list below.
-
-Interface changes:
- - Generalized ``AllocDiag`` for any non-scalar input
-
-Convolution updates:
- - Implemented fractional bilinear upsampling
-
-cuDNN (GPU):
- - Disallowed ``float16`` precision for convolution gradients
- - Fixed memory alignment detection
- - Added profiling in C debug mode (with theano flag ``cmodule.debug=True``)
-
-New features:
- - Implemented truncated normal distribution with box-muller transform
- - Added ``L_op()`` overriding option for ``OpFromGraph``
- - Added NumPy C-API based fallback implementation for ``[sd]gemv_`` and ``[sd]dot_``
-
-Other more detailed changes:
- - Improved stack trace follow-up for GPU optimizations
- - Fixed gradient error for elemwise ``minimum`` and ``maximum`` when compared values are the same
- - Fixed gradient for ``ARange``
- - Removed ``ViewOp`` subclass during optimization
-
-Commiters since 0.9.0:
- - Frederic Bastien
- - João Victor Tozatti Risso
- - Arnaud Bergeron
- - Steven Bocco
- - Mohammed Affan
- - amrithasuresh
- - Pascal Lamblin
- - Reyhane Askari
- - Alexander Matyasko
- - Shawn Tan
- - Simon Lefrancois
- - Adam Becker
- - Vikram
- - Gijs van Tulder
- - Faruk Ahmed
- - Thomas George
- - erakra
- - Andrei Costinescu
- - Boris Fomitchev
- - Zhouhan LIN
- - Aleksandar Botev
- - jhelie
- - xiaoqie
- - Tegan Maharaj
- - Matt Graham
- - Cesar Laurent
- - Gabe Schwartz
- - Juan Camilo Gamboa Higuera
- - Tim Cooijmans
- - Anirudh Goyal
- - Saizheng Zhang
- - Yikang Shen
- - vipulraheja
- - Florian Bordes
- - Sina Honari
- - Chiheb Trabelsi
- - Shubh Vachher
- - Daren Eiri
- - Joseph Paul Cohen
- - Laurent Dinh
- - Mohamed Ishmael Diwan Belghazi
- - Jeff Donahue
- - Ramana Subramanyam
- - Bogdan Budescu
- - Dzmitry Bahdanau
- - Ghislain Antony Vaillant
- - Jan Schlüter
- - Nan Jiang
- - Xavier Bouthillier
- - fo40225
- - mrTsjolder
- - wyjw
- - Aarni Koskela
- - Adam Geitgey
- - Adrian Keet
- - Adrian Seyboldt
- - Anmol Sahoo
- - Chong Wu
- - Holger Kohr
- - Jayanth Koushik
- - Lilian Besson
- - Lv Tao
- - Michael Manukyan
- - Murugesh Marvel
- - NALEPA
- - Zotov Yuriy
- - dareneiri
- - lrast
- - morrme
- - naitonium
-
-
-Theano 0.10.0beta3 (20th of September, 2017)
-============================================
-
-This release contains new features, improvements and bug fixes to prepare the upcoming release candidate.
-
-We recommend that every developer updates to this version.
-
-Highlights:
- - conda packages now available and updated in our own conda channel ``mila-udem``.
-   To install it: ``conda install -c mila-udem -c mila-udem/label/pre theano pygpu``
-
- - Improved elemwise operations
-
-   - Speed-up elemwise ops based on SciPy
-   - Fixed memory leak related to elemwise ops on GPU
-
- - Fixed pygpu detection
- - Bug fixes, crash fixes, warning improvements and documentation updates
-
-A total of 69 people contributed to this release since 0.9.0, see list below.
-
-Interface changes:
- - Removed op ``ExtractDiag`` from ``theano.tensor.nlinalg``, now only in ``theano.tensor.basic``
-
-Convolution updates:
- - Added dilated causal convolutions for 2D
-
-New features:
- - Implemented ``topk`` and ``argtopk`` on CPU and GPU
- - Added ``unravel_index`` and ``ravel_multi_index`` functions on CPU
- - Implemented ``max()`` and ``min()`` functions for booleans and unsigned integers types
-
-Others:
- - Added ``R_op()`` for ``ZeroGrad``
- - Added description for rnnblock
-
-Commiters since 0.9.0:
- - Frederic Bastien
- - João Victor Tozatti Risso
- - Arnaud Bergeron
- - Steven Bocco
- - Mohammed Affan
- - amrithasuresh
- - Pascal Lamblin
- - Reyhane Askari
- - Alexander Matyasko
- - Simon Lefrancois
- - Adam Becker
- - Shawn Tan
- - Vikram
- - Gijs van Tulder
- - Thomas George
- - Andrei Costinescu
- - Faruk Ahmed
- - Boris Fomitchev
- - Zhouhan LIN
- - Aleksandar Botev
- - jhelie
- - xiaoqie
- - Tegan Maharaj
- - Matt Graham
- - Cesar Laurent
- - Gabe Schwartz
- - Juan Camilo Gamboa Higuera
- - Tim Cooijmans
- - Anirudh Goyal
- - Saizheng Zhang
- - Yikang Shen
- - vipulraheja
- - Florian Bordes
- - Sina Honari
- - erakra
- - Chiheb Trabelsi
- - Shubh Vachher
- - Daren Eiri
- - Joseph Paul Cohen
- - Laurent Dinh
- - Mohamed Ishmael Diwan Belghazi
- - Jeff Donahue
- - Ramana Subramanyam
- - Bogdan Budescu
- - Dzmitry Bahdanau
- - Ghislain Antony Vaillant
- - Jan Schlüter
- - Nan Jiang
- - Xavier Bouthillier
- - fo40225
- - wyjw
- - Aarni Koskela
- - Adam Geitgey
- - Adrian Keet
- - Adrian Seyboldt
- - Anmol Sahoo
- - Chong Wu
- - Holger Kohr
- - Jayanth Koushik
- - Lilian Besson
- - Lv Tao
- - Michael Manukyan
- - Murugesh Marvel
- - NALEPA
- - Zotov Yuriy
- - dareneiri
- - lrast
- - morrme
- - naitonium
-
-
-Theano 0.10.0beta2 (7th of September, 2017)
-===========================================
-
-This release contains new features, improvements and bug fixes to prepare the upcoming release candidate.
-
-We recommend that every developer updates to this version.
-
-Highlights:
- - Support NumPy ``1.13``
- - Support pygpu ``0.7``
- - Added conda recipe
- - Optional faster optimization step with new destroy handler
- - Added documentation for RNNBlock
- - Bug fixes, crash fixes, warning improvements and documentation updates
-
-A total of 67 people contributed to this release since 0.9.0, see list below.
-
-Interface changes:
- - Added new parameter ``target`` for MRG functions
-
-Convolution updates:
- - Added unshared convolutions
- - Added 3D separable convolutions
- - Added 3D grouped convolutions
- - Removed old ``conv3d`` interface
- - Deprecated old ``conv2d`` interface
- - Updated ``conv`` documentation
-
-GPU:
- - Added a meta-optimizer to select the fastest GPU implementations for convolutions
-
- - cuDNN:
-
-   - Official support for ``v6.*`` and ``v7.*``, support for ``v5.*`` will be removed in next release
-   - Added spatial transformation operation based on cuDNN
-   - Updated and improved caching system for runtime-chosen cuDNN convolution algorithms
-   - Support cuDNN v7 tensor core operations for convolutions with runtime timed algorithms
-   - Restricted cuDNN reductions to contiguous inputs
-   - Automatic addition of cuDNN DLL path to ``PATH`` environment variable on Windows
-
-New features:
- - Added ``tensor6()`` and ``tensor7()`` in ``theano.tensor`` module
- - Added boolean indexing for sub-tensors
- - Added covariance matrix function ``theano.tensor.cov``
- - Added new Theano flag ``pickle_test_value`` to help disable pickling test values
-
-Others:
- - Kept stack trace for optimizations in new GPU backend
-
-Other more detailed changes:
- - Moved all C code files into separate folder ``c_code`` in every Theano module
- - Improvements for Jenkins tests
-
-Commiters since 0.9.0:
- - Frederic Bastien
- - João Victor Tozatti Risso
- - Arnaud Bergeron
- - Steven Bocco
- - Mohammed Affan
- - amrithasuresh
- - Pascal Lamblin
- - Reyhane Askari
- - Alexander Matyasko
- - Simon Lefrancois
- - Shawn Tan
- - Gijs van Tulder
- - Thomas George
- - Vikram
- - Andrei Costinescu
- - Faruk Ahmed
- - Boris Fomitchev
- - Zhouhan LIN
- - Aleksandar Botev
- - jhelie
- - xiaoqie
- - Tegan Maharaj
- - Matt Graham
- - Cesar Laurent
- - Gabe Schwartz
- - Juan Camilo Gamboa Higuera
- - Tim Cooijmans
- - Anirudh Goyal
- - Saizheng Zhang
- - vipulraheja
- - Florian Bordes
- - Sina Honari
- - Yikang Shen
- - erakra
- - Chiheb Trabelsi
- - Shubh Vachher
- - Daren Eiri
- - Joseph Paul Cohen
- - Laurent Dinh
- - Mohamed Ishmael Diwan Belghazi
- - Jeff Donahue
- - Ramana Subramanyam
- - Bogdan Budescu
- - Dzmitry Bahdanau
- - Ghislain Antony Vaillant
- - Jan Schlüter
- - Xavier Bouthillier
- - fo40225
- - Aarni Koskela
- - Adam Becker
- - Adam Geitgey
- - Adrian Keet
- - Adrian Seyboldt
- - Anmol Sahoo
- - Chong Wu
- - Holger Kohr
- - Jayanth Koushik
- - Lilian Besson
- - Lv Tao
- - Michael Manukyan
- - Murugesh Marvel
- - NALEPA
- - Zotov Yuriy
- - dareneiri
- - lrast
- - morrme
- - wyjw
-
-
-Theano 0.10.0beta1 (9th of August, 2017)
-========================================
-
-This release contains a lot of bug fixes, improvements and new features to prepare the upcoming release candidate.
-
-We recommend that every developer updates to this version.
-
-Highlights:
- - Moved Python 3.* minimum supported version from 3.3 to 3.4
- - Replaced deprecated package ``nose-parameterized`` with up-to-date package ``parameterized`` for Theano requirements
- - Theano now internally uses ``sha256`` instead of ``md5`` to work on systems that forbide ``md5`` for security reason
- - Removed old GPU backend ``theano.sandbox.cuda``. New backend ``theano.gpuarray`` is now the official GPU backend
- - Support more debuggers for ``PdbBreakpoint``
-
- - Scan improvements
-
-   - Speed up Theano scan compilation and gradient computation
-   - Added meaningful message when missing inputs to scan
-
- - Speed up graph toposort algorithm
- - Faster C compilation by massively using a new interface for op params
- - Faster optimization step
- - Documentation updated and more complete
- - Many bug fixes, crash fixes and warning improvements
-
-A total of 65 people contributed to this release since 0.9.0, see list below.
-
-Interface changes:
- - Merged duplicated diagonal functions into two ops: ``ExtractDiag`` (extract a diagonal to a vector),
-   and ``AllocDiag`` (set a vector as a diagonal of an empty array)
- - Renamed ``MultinomialWOReplacementFromUniform`` to ``ChoiceFromUniform``
-
- - Removed or deprecated Theano flags:
-
-   - ``cublas.lib``
-   - ``cuda.enabled``
-   - ``enable_initial_driver_test``
-   - ``gpuarray.sync``
-   - ``home``
-   - ``lib.cnmem``
-   - ``nvcc.*`` flags
-   - ``pycuda.init``
-
- - Changed ``grad()`` method to ``L_op()`` in ops that need the outputs to compute gradient
-
-Convolution updates:
- - Extended Theano flag ``dnn.enabled`` with new option ``no_check`` to help speed up cuDNN importation
- - Implemented separable convolutions
- - Implemented grouped convolutions
-
-GPU:
- - Prevent GPU initialization when not required
- - Added disk caching option for kernels
- - Added method ``my_theano_function.sync_shared()`` to help synchronize GPU Theano functions
- - Added useful stats for GPU in profile mode
- - Added Cholesky op based on ``cusolver`` backend
- - Added GPU ops based on `magma library <http://icl.cs.utk.edu/magma/software/>`_:
-   SVD, matrix inverse, QR, cholesky and eigh
- - Added ``GpuCublasTriangularSolve``
- - Added atomic addition and exchange for ``long long`` values in ``GpuAdvancedIncSubtensor1_dev20``
- - Support log gamma function for all non-complex types
- - Support GPU SoftMax in both OpenCL and CUDA
- - Support offset parameter ``k`` for ``GpuEye``
- - ``CrossentropyCategorical1Hot`` and its gradient are now lifted to GPU
-
- - Better cuDNN support
-
-   - Official support for ``v5.*`` and ``v6.*``
-   - Better support and loading on Windows and Mac
-   - Support cuDNN v6 dilated convolutions
-   - Support cuDNN v6 reductions
-   - Added new Theano flags ``cuda.include_path``, ``dnn.base_path`` and ``dnn.bin_path``
-     to help configure Theano when CUDA and cuDNN can not be found automatically.
-
- - Updated ``float16`` support
-
-   - Added documentation for GPU float16 ops
-   - Support ``float16`` for ``GpuGemmBatch``
-   - Started to use ``float32`` precision for computations that don't support ``float16`` on GPU
-
-New features:
- - Added a wrapper for `Baidu's CTC <https://github.com/baidu-research/warp-ctc>`_ cost and gradient functions
- - Added scalar and elemwise CPU ops for modified Bessel function of order 0 and 1 from ``scipy.special``.
- - Added Scaled Exponential Linear Unit (SELU) activation
- - Added sigmoid_binary_crossentropy function
- - Added tri-gamma function
- - Added modes ``half`` and ``full`` for ``Images2Neibs`` ops
- - Implemented gradient for ``AbstractBatchNormTrainGrad``
- - Implemented gradient for matrix pseudoinverse op
- - Added new prop `replace` for ``ChoiceFromUniform`` op
- - Added new prop ``on_error`` for CPU ``Cholesky`` op
- - Added new Theano flag ``deterministic`` to help control how Theano optimize certain ops that have deterministic versions.
-   Currently used for subtensor Ops only.
- - Added new Theano flag ``cycle_detection`` to speed-up optimization step by reducing time spending in inplace optimizations
- - Added new Theano flag ``check_stack_trace`` to help check the stack trace during optimization process
- - Added new Theano flag ``cmodule.debug`` to allow a debug mode for Theano C code. Currently used for cuDNN convolutions only.
-
-Others:
- - Added deprecation warning for the softmax and logsoftmax vector case
- - Added a warning to announce that C++ compiler will become mandatory in next Theano release ``0.11``
-
-Other more detailed changes:
- - Removed useless warning when profile is manually disabled
- - Added tests for abstract conv
- - Added options for `disconnected_outputs` to Rop
- - Removed ``theano/compat/six.py``
- - Removed ``COp.get_op_params()``
- - Support of list of strings for ``Op.c_support_code()``, to help not duplicate support codes
- - Macro names provided for array properties are now standardized in both CPU and GPU C codes
- - Started to move C code files into separate folder ``c_code`` in every Theano module
- - Many improvements for Travis CI tests (with better splitting for faster testing)
- - Many improvements for Jenkins CI tests: daily testings on Mac and Windows in addition to Linux
-
-Commiters since 0.9.0:
- - Frederic Bastien
- - Arnaud Bergeron
- - amrithasuresh
- - João Victor Tozatti Risso
- - Steven Bocco
- - Pascal Lamblin
- - Mohammed Affan
- - Reyhane Askari
- - Alexander Matyasko
- - Simon Lefrancois
- - Shawn Tan
- - Thomas George
- - Faruk Ahmed
- - Zhouhan LIN
- - Aleksandar Botev
- - jhelie
- - xiaoqie
- - Tegan Maharaj
- - Matt Graham
- - Cesar Laurent
- - Gabe Schwartz
- - Juan Camilo Gamboa Higuera
- - AndroidCloud
- - Saizheng Zhang
- - vipulraheja
- - Florian Bordes
- - Sina Honari
- - Vikram
- - erakra
- - Chiheb Trabelsi
- - Shubh Vachher
- - Daren Eiri
- - Gijs van Tulder
- - Laurent Dinh
- - Mohamed Ishmael Diwan Belghazi
- - mila
- - Jeff Donahue
- - Ramana Subramanyam
- - Bogdan Budescu
- - Ghislain Antony Vaillant
- - Jan Schlüter
- - Xavier Bouthillier
- - fo40225
- - Aarni Koskela
- - Adam Becker
- - Adam Geitgey
- - Adrian Keet
- - Adrian Seyboldt
- - Andrei Costinescu
- - Anmol Sahoo
- - Chong Wu
- - Holger Kohr
- - Jayanth Koushik
- - Jenkins
- - Lilian Besson
- - Lv Tao
- - Michael Manukyan
- - Murugesh Marvel
- - NALEPA
- - Ubuntu
- - Zotov Yuriy
- - dareneiri
- - lrast
- - morrme
- - yikang
-
-
-Theano 0.9.0 (20th of March, 2017)
-==================================
-
-This is a final release of Theano, version ``0.9.0``, with a lot of
-new features, interface changes, improvements and bug fixes.
-
-We recommend that everybody update to this version.
-
-Highlights (since 0.8.0):
- - Better Python 3.5 support
- - Better numpy 1.12 support
- - Conda packages for Mac, Linux and Windows
- - Support newer Mac and Windows versions
- - More Windows integration:
-
-   - Theano scripts (``theano-cache`` and ``theano-nose``) now works on Windows
-   - Better support for Windows end-lines into C codes
-   - Support for space in paths on Windows
-
- - Scan improvements:
-
-   - More scan optimizations, with faster compilation and gradient computation
-   - Support for checkpoint in scan (trade off between speed and memory usage, useful for long sequences)
-   - Fixed broadcast checking in scan
-
- - Graphs improvements:
-
-   - More numerical stability by default for some graphs
-   - Better handling of corner cases for theano functions and graph optimizations
-   - More graph optimizations with faster compilation and execution
-   - smaller and more readable graph
-
- - New GPU back-end:
-
-   - Removed warp-synchronous programming to get good results with newer CUDA drivers
-   - More pooling support on GPU when cuDNN isn't available
-   - Full support of ignore_border option for pooling
-   - Inplace storage for shared variables
-   - float16 storage
-   - Using PCI bus ID of graphic cards for a better mapping between theano device number and nvidia-smi number
-   - Fixed offset error in ``GpuIncSubtensor``
-
- - Less C code compilation
- - Added support for bool dtype
- - Updated and more complete documentation
- - Bug fixes related to merge optimizer and shape inference
- - Lot of other bug fixes, crashes fixes and warning improvements
-
-A total of 123 people contributed to this release since 0.8.0, see list below.
-
-Interface changes:
- - Merged ``CumsumOp/CumprodOp`` into ``CumOp``
- - In MRG module:
-
-   - Replaced method ``multinomial_wo_replacement()`` with new method ``choice()``
-   - Random generator now tries to infer the broadcast pattern of its output
-
- - New pooling interface
- - Pooling parameters can change at run time
- - Moved ``softsign`` out of sandbox to ``theano.tensor.nnet.softsign``
- - Using floatX dtype when converting empty list/tuple
- - ``Roll`` make the shift be modulo the size of the axis we roll on
- - ``round()`` default to the same as NumPy: half_to_even
-
-Convolution updates:
- - Support of full and half modes for 2D and 3D convolutions including in ``conv3d2d``
- - Allowed pooling of empty batch
- - Implement ``conv2d_transpose`` convenience function
- - Multi-cores convolution and pooling on CPU
- - New abstract 3d convolution interface similar to the 2d convolution interface
- - Dilated convolution
-
-
-GPU:
- - cuDNN: support versoin 5.1 and wrap batch normalization (2d and 3d) and RNN functions
- - Multiple-GPU, synchrone update (via platoon, use NCCL)
- - Gemv(matrix-vector product) speed up for special shape
- - cublas gemv workaround when we reduce on an axis with a dimensions size of 0
- - Warn user that some cuDNN algorithms may produce unexpected results in certain environments
-   for convolution backward filter operations
- - ``GPUMultinomialFromUniform`` op now supports multiple dtypes
- - Support for ``MaxAndArgMax`` for some axis combination
- - Support for solve (using cusolver), erfinv and erfcinv
- - Implemented ``GpuAdvancedSubtensor``
-
-New features:
- - ``OpFromGraph`` now allows gradient overriding for every input
- - Added Abstract Ops for batch normalization that use cuDNN when available and pure Theano CPU/GPU alternatives otherwise
- - Added gradient of solve, tensorinv (CPU), tensorsolve (CPU), searchsorted (CPU), DownsampleFactorMaxGradGrad (CPU)
- - Added Multinomial Without Replacement
- - Allowed partial evaluation of compiled function
- - More Rop support
- - Indexing support ellipsis: ``a[..., 3]```, ``a[1,...,3]``
- - Added ``theano.tensor.{tensor5,dtensor5, ...}``
- - compiledir_format support device
- - Added New Theano flag ``conv.assert_shape`` to check user-provided shapes at runtime (for debugging)
- - Added new Theano flag ``cmodule.age_thresh_use``
- - Added new Theano flag ``cuda.enabled``
- - Added new Theano flag ``nvcc.cudafe`` to enable faster compilation and import with old CUDA back-end
- - Added new Theano flag ``print_global_stats`` to print some global statistics (time spent) at the end
- - Added new Theano flag ``profiling.ignore_first_call``, useful to profile the new gpu back-end
- - remove ProfileMode (use Theano flag ``profile=True`` instead)
-
-
-Others:
- - Split op now has C code for CPU and GPU
- - ``theano-cache list`` now includes compilation times
- - Speed up argmax only on GPU (without also needing the max)
- - More stack trace in error messages
- - Speed up cholesky grad
- - ``log(sum(exp(...)))`` now get stability optimized
-
-
-Other more detailed changes:
- - Added Jenkins (gpu tests run on pull requests in addition to daily buildbot)
- - Removed old benchmark directory and other old files not used anymore
- - Use of 64-bit indexing in sparse ops to allow matrix with more then 2\ :sup:`31`\ -1 elements
- - Allowed more then one output to be an destructive inplace
- - More support of negative axis
- - Added the keepdims parameter to the norm function
- - Make scan gradient more deterministic
-
-Commiters since 0.8.0:
- - Frederic Bastien
- - Arnaud Bergeron
- - Pascal Lamblin
- - Steven Bocco
- - Ramana Subramanyam
- - Simon Lefrancois
- - Gijs van Tulder
- - Benjamin Scellier
- - khaotik
- - Chiheb Trabelsi
- - Chinnadhurai Sankar
- - Cesar Laurent
- - Reyhane Askari
- - Mohammad Pezeshki
- - Alexander Matyasko
- - Alexandre de Brebisson
- - Mathieu Germain
- - Nan Rosemary Ke
- - Pierre Luc Carrier
- - Olivier Mastropietro
- - Thomas George
- - Saizheng Zhang
- - Iulian Vlad Serban
- - Francesco Visin
- - Caglar
- - Faruk Ahmed
- - Harm de Vries
- - Samira Shabanian
- - Vincent Dumoulin
- - Nicolas Ballas
- - Jakub Sygnowski
- - Jan Schlüter
- - Samira Ebrahimi Kahou
- - Mikhail Korobov
- - Fei Wang
- - Kv Manohar
- - Jesse Livezey
- - Kelvin Xu
- - Matt Graham
- - Ruslana Makovetsky
- - Sina Honari
- - Bryn Keller
- - Ciyong Chen
- - Vitaliy Kurlin
- - Zhouhan LIN
- - Gokula Krishnan
- - Kumar Krishna Agrawal
- - Ozan Çağlayan
- - Vincent Michalski
- - affanv14
- - Amjad Almahairi
- - Ray Donnelly
- - Tim Cooijmans
- - happygds
- - mockingjamie
- - Christos Tsirigotis
- - Florian Bordes
- - Ilya Kulikov
- - RadhikaG
- - Taesup (TS) Kim
- - Ying Zhang
- - Anton Chechetka
- - Karthik Karanth
- - Kirill Bobyrev
- - Rebecca N. Palmer
- - Yang Zhang
- - Yaroslav Ganin
- - Jonas Degrave
- - Liwei Cai
- - Lucas Beyer
- - Michael Harradon
- - Morgan Stuart
- - Tim Gasper
- - Xavier Bouthillier
- - p
- - texot
- - Andrés Gottlieb
- - Ben Poole
- - Bhavishya Pohani
- - Carl Thomé
- - David Bau
- - Dimitar Dimitrov
- - Evelyn Mitchell
- - Fei Zhan
- - Fuchai
- - Fábio Perez
- - Gennadiy Tupitsin
- - Gilles Louppe
- - Greg Ciccarelli
- - He
- - Huan Zhang
- - Kaixhin
- - Kevin Keraudren
- - Maltimore
- - Marc-Alexandre Cote
- - Marco
- - Marius F. Killinger
- - Martin Drawitsch
- - Maxim Kochurov
- - Micah Bojrab
- - Neil
- - Nizar Assaf
- - Rithesh Kumar
- - Rizky Luthfianto
- - Robin Millette
- - Roman Ring
- - Sander Dieleman
- - Sebastin Santy
- - Shawn Tan
- - Wazeer Zulfikar
- - Wojciech Głogowski
- - Yann N. Dauphin
- - gw0 [http://gw.tnode.com/]
- - hexahedria
- - hsintone
- - jakirkham
- - joncrall
- - root
- - superantichrist
- - tillahoffmann
- - valtron
- - wazeerzulfikar
- - you-n-g
-
-
-Theano 0.9.0rc4 (13th of March, 2017)
-=====================================
-
-This release extends the 0.9.0rc3 and announces the upcoming final release 0.9.
-
-Highlights (since 0.9.0rc3):
- - Documentation updates
- - DebugMode fixes, cache cleanup fixes and other small fixes
-
- - New GPU back-end:
-
-   - Fixed offset error in GpuIncSubtensor
-   - Fixed indexing error in GpuAdvancedSubtensor for more than 2 dimensions
-
-A total of 5 people contributed to this release since 0.9.0rc3 and 123 since 0.8.0, see the lists below.
-
-
-Committers since 0.9.0rc3:
- - Frederic Bastien
- - Pascal Lamblin
- - Arnaud Bergeron
- - Cesar Laurent
- - Martin Drawitsch
-
-
-Theano 0.9.0rc3 (6th of March, 2017)
-====================================
-
-This release extends the 0.9.0rc2 and announces the upcoming final release 0.9.
-
-Highlights (since 0.9.0rc2):
- - Graph clean up and faster compilation
- - New Theano flag conv.assert_shape to check user-provided shapes at runtime (for debugging)
- - Fix overflow in pooling
- - Warn if taking softmax over broadcastable dimension
- - Removed old files not used anymore
- - Test fixes and crash fixes
-
- - New GPU back-end:
-
-   - Removed warp-synchronous programming, to get good results with newer CUDA drivers
-
-A total of 5 people contributed to this release since 0.9.0rc2 and 122 since 0.8.0, see the lists below.
-
-
-Committers since 0.9.0rc2:
- - Frederic Bastien
- - Arnaud Bergeron
- - Pascal Lamblin
- - Florian Bordes
- - Jan Schlüter
-
-
-Theano 0.9.0rc2 (27th of February, 2017)
-========================================
-
-This release extends the 0.9.0rc1 and announces the upcoming final release 0.9.
-
-Highlights (since 0.9.0rc1):
- - Fixed dnn conv grad issues
- - Allowed pooling of empty batch
- - Use of 64-bit indexing in sparse ops to allow matrix with more then 2\ :sup:`31`\ -1 elements.
- - Removed old benchmark directory
- - Crash fixes, bug fixes, warnings improvements, and documentation update
-
-A total of 9 people contributed to this release since 0.9.0rc1 and 121 since 0.8.0, see the lists below.
-
-
-Committers since 0.9.0rc1:
- - Frederic Bastien
- - Pascal Lamblin
- - Steven Bocco
- - Simon Lefrancois
- - Lucas Beyer
- - Michael Harradon
- - Rebecca N. Palmer
- - David Bau
- - Micah Bojrab
-
-
-Theano 0.9.0rc1 (20th of February, 2017)
-========================================
-
-This release extends the 0.9.0beta1 and announces the upcoming final release 0.9.
-
-Highlights (since 0.9.0beta1):
- - Better integration of Theano+libgpuarray packages into conda distribution
- - Better handling of Windows end-lines into C codes
- - Better compatibility with NumPy 1.12
- - Faster scan optimizations
- - Fixed broadcast checking in scan
- - Bug fixes related to merge optimizer and shape inference
- - many other bug fixes and improvements
- - Updated documentation
-
- - New GPU back-end:
-
-   - Value of a shared variable is now set inplace
-
-A total of 26 people contributed to this release since 0.9.0beta1 and 117 since 0.8.0, see the list at the bottom.
-
-Interface changes:
- - In MRG, replaced method `multinomial_wo_replacement()` with new method `choice()`
-
-Convolution updates:
- - Implement conv2d_transpose convenience function
-
-GPU:
- - GPUMultinomialFromUniform op now supports multiple dtypes
-
-New features:
- - OpFromGraph now allows gradient overriding for every input
- - Added Abstract Ops for batch normalization that use cuDNN when available and pure Theano CPU/GPU alternatives otherwise
- - Added new Theano flag cuda.enabled
- - Added new Theano flag print_global_stats to print some global statistics (time spent) at the end
-
-Others:
- - Split op now has C code for CPU and GPU
- - "theano-cache list" now includes compilation times
-
-
-Committers since 0.9.0beta1:
- - Frederic Bastien
- - Benjamin Scellier
- - khaotik
- - Steven Bocco
- - Arnaud Bergeron
- - Pascal Lamblin
- - Gijs van Tulder
- - Reyhane Askari
- - Chinnadhurai Sankar
- - Vincent Dumoulin
- - Alexander Matyasko
- - Cesar Laurent
- - Nicolas Ballas
- - affanv14
- - Faruk Ahmed
- - Anton Chechetka
- - Alexandre de Brebisson
- - Amjad Almahairi
- - Dimitar Dimitrov
- - Fuchai
- - Jan Schlüter
- - Jonas Degrave
- - Mathieu Germain
- - Rebecca N. Palmer
- - Simon Lefrancois
- - valtron
-
-
-Theano 0.9.0beta1 (24th of January, 2017)
-=========================================
-
-This release contains a lot of bug fixes and improvements + new features, to prepare the upcoming release candidate.
-
-Highlights:
- - Many computation and compilation speed up
- - More numerical stability by default for some graph
- - Jenkins (gpu tests run on PR in addition to daily buildbot)
- - Better handling of corner cases for theano functions and graph optimizations
- - More graph optimization (faster execution and smaller graph, so more readable)
- - Less c code compilation
- - Better Python 3.5 support
- - Better numpy 1.12 support
- - Support newer Mac and Windows version
- - Conda packages for Mac, Linux and Windows
- - Theano scripts now works on Windows
- - scan with checkpoint (trade off between speed and memory usage, useful for long sequences)
- - Added a bool dtype
-
- - New GPU back-end:
-
-   - float16 storage
-   - better mapping between theano device number and nvidia-smi number, using the PCI bus ID of graphic cards
-   - More pooling support on GPU when cuDNN isn't there
-   - ignore_border=False is now implemented for pooling
-
-
-A total of 111 people contributed to this release since 0.8.0, see the list at the bottom.
-
-
-Interface changes:
- - New pooling interface
- - Pooling parameters can change at run time
- - When converting empty list/tuple, now we use floatX dtype
- - The MRG random generator now try to infer the broadcast pattern of its output
- - Move softsign out of sandbox to theano.tensor.nnet.softsign
- - Roll make the shift be modulo the size of the axis we roll on
- - Merge CumsumOp/CumprodOp into CumOp
- - round() default to the same as NumPy: half_to_even
-
-Convolution updates:
- - Multi-cores convolution and pooling on CPU
- - New abstract 3d convolution interface similar to the 2d convolution interface
- - Dilated convolution
-
-GPU:
- - cuDNN: support versoin 5.1 and wrap batch normalization (2d and 3d) and RNN functions
- - Multiple-GPU, synchrone update (via platoon, use NCCL)
- - GpuAdvancedSubtensor in new back-end
- - Gemv(matrix-vector product) speed up for special shape
- - Support for MaxAndArgMax for some axis combination
- - Support for solve (using cusolver), erfinv and erfcinv
- - cublas gemv workaround when we reduce on an axis with a dimensions size of 0
- - Warn user that some cuDNN algorithms may produce unexpected results in certain environments
-   for convolution backward filter operations
-
-New features:
- - Add gradient of solve, tensorinv (CPU), tensorsolve (CPU) searchsorted (CPU)
- - Add Multinomial Without Replacement
- - conv3d2d support full and half mode (REMOVE?)
- - Add DownsampleFactorMaxGradGrad.grad
- - Allow partial evaluation of compiled function
- - More Rop support
- - Indexing support ellipsis: a[..., 3], a[1,...,3]
- - Added theano.tensor.{tensor5,dtensor5, ...}
- - compiledir_format support device
- - Added new Theano flag cmodule.age_thresh_use
-
-Others:
- - Speed up argmax only on gpu (without also needing the max)
- - A few unfrequent bugfix
- - More stack trace in error message
- - Speed up cholesky grad
- - log(sum(exp(...))) now get stability optimized
-
-Other more detailed changes:
- - Allow more then one output to be an destructive inplace
- - Add flag profiling.ignore_first_call, useful to profile the new gpu back-end
- - Doc/error message fixes/updates
- - More support of negative axis
- - Added the keepdims parameter to the norm function
- - Crash fixes
- - Make scan gradient more deterministic
- - Add support for space in path on Windows
- - remove ProfileMode (use Theano flag profile=True instead)
-
-
-Committers since 0.8.0:
- - Frederic Bastien
- - Arnaud Bergeron
- - Pascal Lamblin
- - Ramana Subramanyam
- - Simon Lefrancois
- - Steven Bocco
- - Gijs van Tulder
- - Cesar Laurent
- - Chiheb Trabelsi
- - Chinnadhurai Sankar
- - Mohammad Pezeshki
- - Reyhane Askari
- - Alexander Matyasko
- - Alexandre de Brebisson
- - Nan Rosemary Ke
- - Pierre Luc Carrier
- - Mathieu Germain
- - Olivier Mastropietro
- - khaotik
- - Saizheng Zhang
- - Thomas George
- - Iulian Vlad Serban
- - Benjamin Scellier
- - Francesco Visin
- - Caglar
- - Harm de Vries
- - Samira Shabanian
- - Jakub Sygnowski
- - Samira Ebrahimi Kahou
- - Mikhail Korobov
- - Faruk Ahmed
- - Fei Wang
- - Jan Schlüter
- - Kv Manohar
- - Jesse Livezey
- - Kelvin Xu
- - Matt Graham
- - Ruslana Makovetsky
- - Sina Honari
- - Bryn Keller
- - Ciyong Chen
- - Nicolas Ballas
- - Vitaliy Kurlin
- - Zhouhan LIN
- - Gokula Krishnan
- - Kumar Krishna Agrawal
- - Ozan Çağlayan
- - Vincent Michalski
- - Ray Donnelly
- - Tim Cooijmans
- - Vincent Dumoulin
- - happygds
- - mockingjamie
- - Amjad Almahairi
- - Christos Tsirigotis
- - Ilya Kulikov
- - RadhikaG
- - Taesup (TS) Kim
- - Ying Zhang
- - Karthik Karanth
- - Kirill Bobyrev
- - Yang Zhang
- - Yaroslav Ganin
- - Liwei Cai
- - Morgan Stuart
- - Tim Gasper
- - Xavier Bouthillier
- - p
- - texot
- - Andrés Gottlieb
- - Ben Poole
- - Bhavishya Pohani
- - Carl Thomé
- - Evelyn Mitchell
- - Fei Zhan
- - Fábio Perez
- - Gennadiy Tupitsin
- - Gilles Louppe
- - Greg Ciccarelli
- - He
- - Huan Zhang
- - Jonas Degrave
- - Kaixhin
- - Kevin Keraudren
- - Maltimore
- - Marc-Alexandre Cote
- - Marco
- - Marius F. Killinger
- - Maxim Kochurov
- - Neil
- - Nizar Assaf
- - Rithesh Kumar
- - Rizky Luthfianto
- - Robin Millette
- - Roman Ring
- - Sander Dieleman
- - Sebastin Santy
- - Shawn Tan
- - Wazeer Zulfikar
- - Wojciech Głogowski
- - Yann N. Dauphin
- - gw0 [http://gw.tnode.com/]
- - hexahedria
- - hsintone
- - jakirkham
- - joncrall
- - root
- - superantichrist
- - tillahoffmann
- - wazeerzulfikar
- - you-n-g
-
-
-Theano 0.8.2 (21th of April, 2016)
-==================================
-
-This is a point release with only the support for cudnn v5 convolution
-and minor fixes.
-
-Highlights:
-- cuDNN v5 convolution support (cuDNN v3 isn't supported anymore)
-- A few crash fixes
-
-
-Theano 0.8.1 (29th of March, 2016)
-==================================
-
-This is a point release without any new feature.
-
-It fixes compilation issues on MacOS X with the command line tools for
-XCode 7.3, which was released shortly after Theano 0.8.0.
-
-
-Theano 0.8 (21th of March, 2016)
-================================
-
-We recommend that everybody update to this version.
-
-Highlights:
- - Python 2 and 3 support with the same code base
- - Faster optimization
- - Integration of cuDNN for better GPU performance
- - Many Scan improvements (execution speed up, ...)
- - optimizer=fast_compile moves computation to the GPU.
- - Better convolution on CPU and GPU. (CorrMM, cudnn, 3d conv, more parameter)
- - Interactive visualization of graphs with d3viz
- - cnmem (better memory management on GPU)
- - BreakpointOp
- - Multi-GPU for data parallism via Platoon (https://github.com/mila-udem/platoon/)
- - More pooling parameter supported
- - Bilinear interpolation of images
- - New GPU back-end:
-
-   * Float16 new back-end (need cuda 7.5)
-   * Multi dtypes
-   * Multi-GPU support in the same process
-
-
-A total of 141 people contributed to this release, see the list at the bottom.
-
-
-Installation:
- - Better BLAS detection
- - Fixes for more recent software and OS versions
- - Support Anaconda on Windows
-
-Bug fixes:
- - GpuJoin now supports negative axis
- - Fix GpuCumsum for negative axis
-
-Interface Deprecation (a warning is printed):
- - Deprecate Param class, use In instead
-
-Interface Changes:
- - Rename DownsampleFactorMax to Pool.
- - tensor.stack now uses the same interface as numpy.stack
- - optimizer=fast_compile moves computation to the GPU
- - Raise the user stack trace more frequently.
- - Change dev version numbering to follow the PEP 440
-
-
-New Interface (reuses existing functionality):
- - theano.tensor.nnet.relu
- - theano.tensor.nnet.elu
- - BatchNormalization.
- - MaxAndArgmax support axis=None
- - Add theano.tensor.compress (equivalent of numpy.compress)
- - theano.tensor.signal.downsamples.max_pool_2d_same_size
- - COp
- - __props__
-
-New features
- - tensor.unique
- - map_variables
- - erfcx
- - mgrid, ogrid
- - allclose
- - BreakpointOp
- - Make bincount work on GPU
- - SolveOp on GPU
- - Optional optimization remove_all_assert
- - AllocEmpty
- - LogSoftmax, for stability optimization when the crossentropy optimization does not apply.
- - theano.tensor.repeat works on GPU
- - BatchedDot on the GPU and faster on the CPU.
- - Faster batched_tensordot and make it work on GPU.
- - SoftmaxGrad grad
- - 3d conv via CorrMM on the GPU
- - CPU Max Pool support of padding and strides!=windows size
- - theano.function() now accepts a dict for the outputs. When doing this, the function will return a dict. Helpful to keep track of which output is what.
- - Warn for unknown or misspelled theano config variables
- - theano.tensor.tile update (accept symbolic reps, work on GPU)
- - scan how have a strict flag. If set to True, this make scan building faster and could make execution faster.
- - theano.tensor.signal.conv2d(2d,2d) output 2d answer
- - More convolution parameter supported
- - Bilinear interpolation of images
-
-
-Speed-ups:
- - Faster SetSubtensor on the GPU.
- - Support more reduction pattern on the GPU.
- - More graph optimization
- - Faster graph optimization
- - GpuCrossentropySoftmaxArgmax1HotWithBias
-
-
-Crash/no return fixes:
- - Fix crash in the assert op grad
- - Fix curand crash on Mac
- - Multiple Fix scan crashes
- - Finish to update all Op.grad() implementation to the new interface
-
-Others:
- - Support ARM processor.
- - Better tests
- - Code clean up.
- - Doc updates
- - doctest and sphinx test in travis
- - More tests tagged as slow
- - Better same_shape implementation
- - More op with c code to lower overhead
- - Custom pickler for SharedVariable theano.misc.pkl_utils.{dump,load}
- - function_dump to help us reproduce user error during compilation
- - assert_no_cpu_op
- - pep8, flake8
- - Better error messages
- - On non-default modes, reduce the number of allocation when allow_gc=False
- - Better lock
-
-
-Committers for this dev version only:
- - Frederic Bastien
- - Arnaud Bergeron
- - Pierre Luc Carrier
- - Iban Harlouchet
- - Pascal Lamblin
- - Chienli Ma
- - Tim Cooijmans
- - Nicolas Ballas
- - Amjad Almahairi
- - David Warde-Farley
- - Christof Angermueller
- - Ziye Fan
- - Caglar
- - Sina Honari
- - Roy Xue
- - hantek
- - Mohammad Pezeshki
- - Melanie Ducoffe
- - Alexandre de Brebisson
- - Harm de Vries
- - Samira Shabanian
- - Alex Lamb
- - Ramana.S
- - Francesco Visin
- - Saizheng Zhang
- - Ying Zhang
- - Jan Schlüter
- - Xavier Bouthillier
- - Bart van Merrienboer
- - Cesar Laurent
- - Iulian Vlad Serban
- - Li Yao
- - Sigurd Spieckermann
- - Dmitrii Serdiuk
- - Kelvin Xu
- - Sebastien Jean
- - Thomas Mesnard
- - Seon-Wook Park
- - Vincent Michalski
- - Dustin Webb
- - Mikhail Korobov
- - Orhan Firat
- - Olivier Mastropietro
- - Daniel Renshaw
- - Julien Rebetez
- - Peng Liu
- - Sean Lee
- - TimSalimans
- - Andre Holzner
- - Gijs van Tulder
- - Guillaume Alain
- - Julien Demouth
- - Markus Beissinger
- - Mehdi Mirza
- - Moslem Kazemi
- - Saxenauts
- - Søren Kaae Sønderby
- - sentient07
- - Anatoly Belikov
- - Diogo Moitinho de Almeida
- - Jakub Sygnowski
- - Kashif Rasul
- - Laurent Dinh
- - Rémy Léone
- - Taesup (TS) Kim
- - gw0 [http://gw.tnode.com/]
- - mronian
- - vesis84
- - Benni
- - Chiheb Trabelsi
- - JesseLivezey
- - Marius Killinger
- - Matt Graham
- - Matthew Willson
- - Piotr Frankowski
- - Stefan Krastanov
- - vdumoulin
- - Adithya Ganesh
- - Anish Shah
- - Balázs Hidasi
- - Colin Raffel
- - Cory Lorenz
- - Doug
- - Jesse Livezey
- - John Salvatier
- - John Zedlewski
- - Jonathan Ho
- - Kaixhin
- - Liang-Chi Hsieh
- - Lucas Beyer
- - Luke Metz
- - Marc-Alexandre Cote
- - Martin Arjovsky
- - Matthias Kümmerer
- - Sirisha Rambhatla
- - briancheung
- - cai-lw
- - ivdorelian
- - jan-matthis
- - jojolalpin
- - joncrall
- - peterjsadowski
- - scottsievert
- - Étienne Simon
- - A. Flaxman
- - AlOa
- - Albert Zeyer
- - Andrea
- - Andy Jiang
- - Balázs
- - Ben Poole
- - Brian Cheung
- - Christophe Van Gysel
- - Claude Coulombe
- - Clay McLeod
- - Dario Garcia
- - Jakob Lombacher
- - Joao Felipe Santos
- - John Arevalo
- - Jonas Degrave
- - Martin Thoma
- - Mathieu Germain
- - Matthew Koichi Grimes
- - Michael Eickenberg
- - Michael Opitz
- - Paul Hollensen
- - Prayag Verma
- - Saatvik Shah
- - Sergei Lebedev
- - Vik Kamath
- - Wei Ouyang
- - Wojciech Głogowski
- - Yi-Lin Juang
- - Yurii Shevchuk
- - Zach Dwiel
- - dan
- - eulerreich
- - jotterbach
- - rolf
- - theaverageguy
- - wuaalb
-
-
-Theano 0.7 (26th of March, 2015)
-================================
-We recommand to everyone to upgrade to this version.
-
-Highlights:
- * Integration of cuDNN for 2D convolutions and pooling on supported GPUs
- * Too many optimizations and new features to count
- * Various fixes and improvements to scan
- * Better support for GPU on Windows
- * On Mac OS X, clang is used by default
- * Many crash fixes
- * Some bug fixes as well
-
-
-Theano 0.6 (December 3th, 2013)
-===================================
-
-We recommend that everybody update to this version.
-
-
-Highlights (since 0.6rc5):
- * Last release with support for Python 2.4 and 2.5.
- * We will try to release more frequently.
- * Fix crash/installation problems.
- * Use less memory for conv3d2d.
-
-0.6rc4 skipped for a technical reason.
-
-Highlights (since 0.6rc3):
- * Python 3.3 compatibility with buildbot test for it.
- * Full advanced indexing support.
- * Better Windows 64 bit support.
- * New profiler.
- * Better error messages that help debugging.
- * Better support for newer NumPy versions (remove useless warning/crash).
- * Faster optimization/compilation for big graph.
- * Move in Theano the Conv3d2d implementation.
- * Better SymPy/Theano bridge: Make an Theano op from SymPy expression and use SymPy c code generator.
- * Bug fixes.
-
-Change from 0.6rc5:
- * Fix crash when specifing march in cxxflags Theano flag. (Frederic B., reported by FiReTiTi)
- * code cleanup (Jorg Bornschein)
- * Fix Canopy installation on windows when it was installed for all users: Raingo
- * Fix Theano tests due to a scipy change. (Frederic B.)
- * Work around bug introduced in scipy dev 0.14. (Frederic B.)
- * Fix Theano tests following bugfix in SciPy. (Frederic B., reported by Ziyuan Lin)
- * Add Theano flag cublas.lib (Misha Denil)
- * Make conv3d2d work more inplace (so less memory usage) (Frederic B., repoted by Jean-Philippe Ouellet)
-
-
-Committers since 0.5:
-
-Frederic Bastien
-Pascal Lamblin
-Ian Goodfellow
-Olivier Delalleau
-Razvan Pascanu
-abalkin
-Arnaud Bergeron
-Nicolas Bouchard +
-Jeremiah Lowin +
-Matthew Rocklin
-Eric Larsen +
-James Bergstra
-David Warde-Farley
-John Salvatier +
-Vivek Kulkarni +
-Yann N. Dauphin
-Ludwig Schmidt-Hackenberg +
-Gabe Schwartz +
-Rami Al-Rfou' +
-Guillaume Desjardins
-Caglar +
-Sigurd Spieckermann +
-Steven Pigeon +
-Bogdan Budescu +
-Jey Kottalam +
-Mehdi Mirza +
-Alexander Belopolsky +
-Ethan Buchman +
-Jason Yosinski
-Nicolas Pinto +
-Sina Honari +
-Ben McCann +
-Graham Taylor
-Hani Almousli
-Ilya Dyachenko +
-Jan Schlüter +
-Jorg Bornschein +
-Micky Latowicki +
-Yaroslav Halchenko +
-Eric Hunsberger +
-Amir Elaguizy +
-Hannes Schulz +
-Huy Nguyen +
-Ilan Schnell +
-Li Yao
-Misha Denil +
-Robert Kern +
-Sebastian Berg +
-Vincent Dumoulin +
-Wei Li +
-XterNalz +
-
-
-A total of 51 people contributed to this release.
-People with a "+" by their names contributed a patch for the first time.
-
-
-Theano 0.6rc5 (November 25th, 2013)
-===================================
-
-We recommend that everybody update to this version.
-
-We plan to release 0.6 in one week if there is no problem introduced
-with this release candidate.
-
-Theano 0.6rc4 was skipped due to a problem with pypi
-
-Highlights:
- * Python 3.3 compatibility with buildbot test for it.
- * Full advanced indexing support.
- * Better Windows 64 bit support.
- * New profiler.
- * Better error messages that help debugging.
- * Better support for newer NumPy versions (remove useless warning/crash).
- * Faster optimization/compilation for big graph.
- * Move in Theano the Conv3d2d implementation.
- * Better SymPy/Theano bridge: Make an Theano op from SymPy expression and use SymPy c code generator.
- * Bug fixes.
-
-Committers for this rc5 only:
-
-Frederic Bastien
-Pascal Lamblin
-Arnaud Bergeron
-abalkin
-Olivier Delalleau
-John Salvatier
-Razvan Pascanu
-Jeremiah Lowin
-Ludwig Schmidt-Hackenberg +
-Vivek Kulkarni
-Matthew Rocklin
-Gabe Schwartz
-James Bergstra
-Sigurd Spieckermann +
-Bogdan Budescu +
-Mehdi Mirza +
-Nicolas Bouchard
-Ethan Buchman +
-Guillaume Desjardins
-Ian Goodfellow
-Jason Yosinski
-Sina Honari +
-Ben McCann +
-David Warde-Farley
-Ilya Dyachenko +
-Jan Schluter +
-Micky Latowicki +
-Yaroslav Halchenko +
-Alexander Belopolsky
-Hannes Schulz +
-Huy Nguyen +
-Robert Kern +
-Sebastian Berg +
-Vincent Dumoulin +
-Wei Li +
-XterNalz +
-
-
-A total of 36 people contributed to this release.
-People with a "+" by their names contributed a patch for the first time.
-
-Installation:
- * Canopy support (direct link to MKL):
-   * On Linux and Mac OSX (Frederic B., Robert Kern)
-   * On Windows (Edward Shi, Frederic B.)
-
- * Anaconda instructions (Pascal L., Frederic B.)
- * Doc Ubuntu 13.04 (Frederic B.)
- * Better support of newer NumPy version(remove useless warning/crash) (Frederic B., Huy Nguyen)
-
-Bug fixes:
- * Scan: if a scan node was cloned (by theano.clone) with different inputs, and if both the initial and the cloned nodes are used in the function being compiled, the value of the outputs of one would be replaced with the outputs of the other one. (Pascal L.)
- * Sparse: Disable the optimization that introduce the CSMGradC op as it doesn't work correctly with unsorted indices. (Frederic B.)
- * Mac: Fix wrong result of GpuDownsampleFactorMaxGrad on Mac OSX. (Pascal L.)
- * Mac: Auto-Detect and work around a bug in BLAS on MacOS X (Pascal L.)
- * Mac: Work around bug in MacOS X. If 2 compiled modules had the same name, the OS or Python was not always the right one even when we used the right handle to it. (Pascal L.)
-   Use this hash in the Python module, and in %(nodename)s, so that different helper functions in the support code for different Ops will always have different names.
- * Sparse grad: Fix ConstructSparseFromList.infer_shape (Pascal L., reported by Rami Al-Rfou')
- * (introduced in the development version after 0.6rc3 release) (Frederic B.)
-   Reduction that upcasts the input on no axis (ex: call theano.sum() on a scalar when the original dtype isn't float64 or
-   [u]int64). It produced bad results as we did not upcasted the inputs in the code, we just copy them.
- * Fix some cases of theano.clone() when we get a replacement of x that is a function of x. (Razvan P., reported by Akio Takano)
- * Fix grad of Alloc when we unbroadcast the value and it isn't a scalar. (Frederic B., reported Ian G.)
-
-   * In some cases (I think most cases), there was an exception raised in the theano.tensor.grad() method.
-     But in theory, there could be bad shapes produced in the unbroadcasted dimensions.
-
-Interface Deprecation (a warning is printed):
- * The mode ProfileMode is now deprecated, use the Theano flag profile=True to replace it.
- * New theano.sparse_grad() interface to get the sparse grad of a_tensor[an_int_vector]. (Frederic B.)
-   This can speed up the sparse computations when a small fraction of a_tensor is taken.
-   Deprecate the old interface for this. (Frederic B.)
-
-Interface Changes:
- * Interface change subtensor and take are not in tensor.basic anymore. They were available from tensor.* and are still available from there. (Frederic B., Matthew Rocklin)
-   * This lowers the basic.py size to 191k, so under 200k for github search.
- * Add -m32 or -m64 in the module cache key and add the python bitwidth in the compiledir path. (Pascal L.)
- * mrg.normal now has the parameter size mandatory. It was crashing with the default value of None. (Olivier D.)
- * Remove the deprecated passing of multiple modes to theano function. (Frederic B.)
- * Change FunctionGraph Features interface of the {on_prune(),on_import()} call back to take a reason. (Frederic B.)
- * FunctionGraph now clone the input graph by default. (Frederic B.)
-   * Added a parameter to optionally not do this cloning.
-   * This was needed to speed up compilation
-
-New Interface (reuses existing functionality):
- * Add hostname as a var in compiledir_format (Frederic B.)
- * Add a new Theano flag: compute_test_value_opt. It takes the same values as compute_test_value. It enables compute_test_value during Theano optimization. Only useful to debug Theano optimization. Also small changes to some optimization to work correctly in that setup. (Frederic B.)
- * Add the value pdb to the Theano flag: compute_test_value and compute_test_value_opt. (Frederic B.)
- * Add the Theano flag: optimizer_verbose. Default False. When True, we print all the optimization being applied.(Frederic B.)
- * Add Op.c_init_code() to allow running the code when the c cmodule is imported (Pascal L.)
- * Allow theano.tensor.ones(3) to support scalar and not just list of scalar as numpy.ones (Jeremiah Lowin)
- * Make the memory profiler print the FLOPS used for the ops that know how to compute it. (Frederic B.)
-
-New Features:
- * Make tensor.{constant,as_tensor_variable} work with memmap. (Christian Hudon, Frederic Bastien)
- * compilation work on ARM processor (Raspberry Pi, Vincent Dumoulin)
- * Add numpy.random.choice wrapper to our random number generator (Sigurd Spieckermann)
- * Better SymPy/Theano bridge: Make an Theano op from SymPy expression and use SymPy c code generator (Matthew Rocklin)
- * Move in Theano the Conv3d2d implementation (James Bergstra, Frederic B., Pascal L.)
- * First version of the new GPU back-end available (Arnaud Bergeron, Frederic B.)
-
-   * Not all Ops have been converted to this new back-end.
-     To use, use Theano flag device=cudaN or device=openclN, where N is a integer.
- * Python 3.3 compatible (abalkin, Gabe Schwartz, Frederic B., Pascal L.)
- * A new profiler (Frederic B.)
-   The new profiler now can profile the memory with the Theano flag profile_memory=True.
-   The ProfileMode now can't profile memory anymore and prints a message about it.
-   Now we raise an error if we try to profile when the gpu is enabled if we didn't set
-   correctly the env variable to force the driver to sync the kernel launch.
-   Otherwise the profile information are useless.
-   The new profiler supports the enabling/disabling of the garbage collection.
- * Adds tensor.tri, tensor.triu, and tensor.tril functions that wrap Numpy equivalents (Jeremiah Lowin)
- * Adds tensor.nonzero, tensor.flatnonzero functions that wrap Numpy equivalents (Jeremiah Lowin)
- * Adds tensor.nonzero_values to get around lack of advanced indexing for nonzero elements (Jeremiah Lowin)
- * Make {inc,set}_subtensor work on output of take. (Pascal L.)
- * When device=cpu and force_device=True, force that we disable the gpu. (Frederic B.)
- * Better Windows 64 bit support for indexing/reshaping (Pascal L.)
- * Full advanced indexing support (John Salvatier, seberg)
- * Add theano.tensor.stacklist(). Recursivly stack lists of tensors to maintain similar structure (Matthew R.)
- * Add Theano flag value: on_opt_error=pdb (Olivier D.)
- * GpuSoftmax[WithBias] for bigger row. (Frederic B.)
- * Make Erfinv work on the GPU (Guillaume Desjardin, Pascal L.)
- * Add "theano-cache basecompiledir purge" (Pascal L.)
-   This purges all the compiledirs that are in the base compiledir.
- * A_tensor_variable.zeros_like() now supports the dtype parameter (Pascal L.)
- * More stable reduce operations by default (Pascal L.)
-   Add an accumulator dtype to CAReduceDtype (acc_dtype)
-   by default, acc_dtype is float64 for float32 inputs,
-   then cast to specified output dtype (float32 for float32 inputs)
- * Test default blas flag before using it (Pascal L.)
-   This makes it work correctly by default if no blas library is installed.
- * Add cuda.unuse() to help tests that need to enable/disable the GPU (Frederic B.)
- * Add theano.tensor.nnet.ultra_fast_sigmoid and the opt (disabled by default) local_ultra_fast_sigmoid. (Frederic B.)
- * Add theano.tensor.nnet.hard_sigmoid and the opt (disabled by default) local_hard_sigmoid. (Frederic B.)
- * Add class theano.compat.python2x.Counter() (Mehdi Mirza)
- * Allow a_cuda_ndarray += another_cuda_ndarray for 6d tensor. (Frederic B.)
- * Make the op ExtractDiag work on the GPU. (Frederic B.)
- * New op theano.tensor.chi2sf (Ethan Buchman)
- * Lift Flatten/Reshape toward input on unary elemwise. (Frederic B.)
-   This makes the "log(1-sigmoid) -> softplus" stability optimization being applied with a flatten/reshape in the middle.
- * Make MonitorMode use the default optimizers config and allow it to change used optimizers (Frederic B.)
- * Add support for ScalarOp.c_support_code in GpuElemwise. (Frederic B.)
- * Also make the Psi function run on GPU. (Frederic B.)
- * Make tensor.outer(x,y) work when ndim != 1 as numpy.outer.
- * Kron op: Speed up/generalize/GPU friendly. (Frederic B.)
-   (It is not an op anymore, but reuses current op)
- * Add gpu max for pattern (0, 1) and added all gpu max pattern for gpu min. (Frederic B.)
- * Add GpuEye (Frederic B.)
- * Make GpuCrossentropySoftmaxArgmax1HotWithBias and GpuCrossentropySoftmax1HotWithBiasDx work for bigger inputs (Frederic B., reported by Ryan Price)
- * Finish and move out of sandbox theano.sparse.basic.true_dot (Nicolas Bouchard, Frederic B.)
-   And document all sparse dot variants.
- * Implement the mode ignore_borders for GpuImages2Neibs (Frederic B.)
- * Make many reduction functions accept a numpy scalar as axis (Jeremiah Lowin)
- * Allow numpy.asarray(cuda_ndarray, dtype=...) (Frederic B.)
- * theano-cache cleanup now remove cached module old version of code. (Frederic B.)
-
-
-Speed-ups:
- * Optimizer speed up. (Frederic B.)
- * Fix warning on newer llvm version on Mac. (Pascal L., reported by Jeremiah Lowin and Chris Fonnesbeck)
- * Allow pickling of more Ops to allow reusing the compiled code (Pascal L., Frederic B.)
- * Optimize more cases of dot22 and scalar when we can't make a gemm (Pascal L., Frederic B.)
- * Speed up GpuJoin with c code (Ludwig Schmidt-Hackenberg, Frederic B.)
- * Faster GpuAdvancedIncSubtensor1 on Fermi GPU (and up) on matrix. (Vivek Kulkarni)
- * Faster GPUAdvancedIncSubtensor1 in some cases on all GPU (Vivek Kulkarni)
- * Implemented c_code for AdvancedSubtensor1 (abalkin)
- * Add the equivalent of -march=native to g++ command line. (Frederic B., Pascal L.)
- * Speed up compilation with Scan (Jan Schluter)
- * Merge more Scan nodes together (Pascal L., Yao Li).
- * Add MakeVector.c_code (Frederic B.)
- * Add Shape.c_code (Frederic B.)
- * Optimize Elemwise when all the inputs are fortran (Frederic B.)
-   We now generate a fortran output and use vectorisable code.
- * Add ScalarOp.c_code_contiguous interface and do a default version. (Frederic B.)
-   This could optimize elemwise by helping the compiler generate SIMD instruction.
- * Use ScalarOp.c_code_contiguous with amdlibm. (Frederic B.)
-   This speeds up exp, pow, sin, cos, log, log2, log10 and sigmoid when the input is contiguous in memory.
- * A fix that removes a local_setsubtensor_of_allocs optimization warning and enables it in that case. (Frederic B., reported by John Salvatier)
- * Make inv_as_solve optimization work (Matthew Rocklin)
-
-Crash/no return fixes:
- * Fix scan crash in the grad of grad of a scan with special structure (including scan in a scan) (Razvan P., Bitton Tenessi)
- * Fix various crashes when calling scan() with inputs specified in unusual ways. (Pascal L.)
- * Fix shape crash inserted by Scan optimization. The gradient of some recursive scan was making the PushOutSeqScan optimization insert crash during the execution of a Theano function. (Frederic B., reported by Hugo Larochelle)
- * Fix command not returning with recent mingw64 on Windows (Pascal L., reported by many people)
- * Fix infinite loop related to Scan on the GPU. (Pascal L.)
- * Fix infinite loop when the compiledir is full. (Frederic B.)
- * Fix a shape cycle crash in the optimizer (Pascal L., Frederic B., reported by Cho KyungHyun)
- * Fix MRG normal() now allow it to generate scalars. (Pascal L.)
- * Fix some GPU compilation issue on Mac (John Yani, Frederic B.)
- * Fix crash when building symbolic random variables with a mix of symbolic and numeric scalar in the "size" parameter. (Pascal L., Reported by Wu Zhen Zhou)
- * Make some Op.grad() implementions not return None (Pascal L.)
- * Crash fix in the grad of elemwise about an DisconnectedType (Pascal L, reported by Thomas Wiecki)
- * Fix local_gpu_multinomial optimization handling of broadcast information. (Frederic B., reported by Caglar)
- * Fix crash with change introduced in NumPy 1.7.1 (Pascal L., reported by Thomas Wiecki)
- * Compilation failure with complex (Pascal L., reported by autumncat)
- * Gpu reduction on all dimensions of a 4d tensor. (Frederic B., reported by Arjun Jain)
- * Fix crash for a combination of grad of dot and dimshuffle when only one of the inputs for a corresponding dimensions was knowing that it was broadcastable. (Frederic B., reported by Micky Latowicki)
- * AdvancedSubtensor1: allow broadcasted index vector. (Frederic B., reported by Jeremiah Lowin)
- * Fix compute_test_value for ifelse (Olivier D., reported by Bitton Tenessi)
- * Fix import error with some versions of NumPy (Olivier D.)
- * Fix Scan grad exception (Razvan P., reported by Nicolas BL)
- * Fix compute_test_value for a non_sequence when calling the gradient of Scan (Pascal L., reported by Bitton Tenessi).
- * Crash fix in Scan following interface change in 0.6rc2 (Razvan P.)
- * Crash fix on Scan (Razvan P.)
- * Crash fix on Scan (Pascal L., reported by Sina Honari and Sigurd)
- * Fix crash in Scan gradient related to compute_test_value (Frederic B., reported by Bitton Tenessi)
- * Fix a scan optimization warning/error depending of Theano flags (Frederic B.)
- * Fixed crash for unimplemented elemwise gradient (Olivier D., reported by Michael McNeil Forbes)
- * Fix crash in the elemwise python code for some big shape with power of 2. (Sina Honari, Pascal L.)
- * Fix compile and import errors on Windows including for the GPU. (Bogdan Budescu)
- * Fix GPU compilation on Windows (XterNalz)
- * Fix local_abs_merge optimization crash (Pascal L., reported by Jeremiah Lowin)
- * Fix import theano crash when g++ isn't there (Olivier D.)
- * Fix crash related to rebuild of Theano graph (Pascal L., reported by Divine Eguzouwa)
- * Fix crash during compilation (David Ward-Farley)
- * Crash fix in the grad of GPU op in corner case (Pascal L.)
- * Crash fix on MacOS X (Robert Kern)
- * theano.misc.gnumpy_utils.garray_to_cudandarray() set strides correctly for dimensions of 1. (Frederic B., reported by Justin Bayer)
- * Fix crash during optimization with consecutive sums and some combination of axis (Frederic B., reported by Caglar Gulcehre)
- * Fix crash with keepdims and negative axis (Frederic B., reported by David W.-F.)
- * Fix crash of theano.[sparse.]dot(x,y) when x or y is a vector. (Frederic B., reported by Zsolt Bitvai)
- * Fix opt crash/disabled with ifelse on the gpu (Frederic B, reported by Ryan Price)
- * Fix crash in optimization involving dot22, (Pascal L., reported by @micklat)
- * Prevent shape optimizations from introducing cycles in the graph (Frederic Bastien, Pascal Lamblin, reported by Kyunghyun Cho)
-
-Others:
- * Update/Fixes/Typo/pep8 documentation and/or tutorial (Olivier D., David W.-F., Frederic B., Yaroslav Halchenko, Micky Latowicki, Ben McCann, Jason Yosinski, reported by Arnaud Bergeron)
- * Doc how to make a sparse Op. (Frederic B.)
- * Doc compatibility guide (abalkin)
- * Fix problem in remove_constants_and_unused_inputs_scan. (useless warning and maybe slow down) (Pascal L.)
- * Fix rop dot.(Razvan P., reported by Jeremiah Lowin)
- * Raise better error related to pydot bug. (Frederic B., reported by Jason Yosinski and Ludwig Schmidt-Hackenberg)
- * Fix to Theano tutorial examples. (reported by Ilya Dyachenko)
- * Fix SharedVar.value property to make it raise an exception (Frederic B., reported by Drew Duncan)
- * Fix verification with compute_test_value in grad() (Frederic B.)
- * Theano flags are now evaluated lazily, only if requested (Frederic B.)
- * Fix test when g++ is not avail (Frederic B.)
- * Add manual instructions for OpenBLAS on Ubuntu by (Jianri Li )
- * Better/more error messages (Frederic B., Pascal L., Ian Goodfellow)
- * Fix Error reporting with GpuConv (Frederic B., reported by Heng Luo and Nicolas Pinto)
- * Now travis-ci tests with scipy the parts that need it (Frederic B.)
- * Export some functions that work on CudaNdarray for windows (Frederic B.)
- * If the user specifies a -arch=sm_* value in the Theano flags for the gpu, don't add one (Frederic B., Pascal L.)
- * If a C thunk returns an error, check if a python exception is set. Otherwise, set a default one (Pascal L.)
- * Crash fix introduced in the development version (Wei LI)
- * Added BLAS benchmark result (Frederic B., Ben McCann)
- * Fix code comment (Hannes Schulz)
- * More stable tests (Frederic B.)
- * Add utt.asset_allclose(a, b) to have better error message. (Frederic B.)
- * Better error message with compute_test_value (Frederic, reported by John Salvatier)
- * Stochastic order behavior fix (Frederic B.)
-
- * Simpler initial graph for subtensor infer shape (Olivier D.)
-   The optimization was doing the optimization, but this allows better reading of the graph before optimization.
- * Better detection of non-aligned ndarray (Frederic B.)
- * Update MRG multinomial gradient to the new interface (Mehdi Mirza)
- * Implement Image2Neibs.perform() to help debug (Frederic B.)
- * Remove some Theano flags from the compilation key (Frederic B.)
- * Make theano-nose work on executable '\*.py' files. (Alistair Muldal)
- * Make theano-nose work with older nose version (Frederic B.)
- * Add extra debug info in verify_grad() (Frederic B.)
-
-
-Theano 0.6rc3 (February 14th, 2013)
-===================================
-
-Highlights:
- * Windows related fixes.
- * Speed-ups.
- * Crash fixes.
- * A few small interface changes.
- * GPU memory leak fix.
- * A few corner cases fixes without incidence.
- * More Theano determinism
- * tensor.{dot,tensordot} more complete/faster/GPU friendly.
- * tensor.tensordot now support Rop/Lop
- * tensor.dot support n-dimensional inputs as NumPy
- * To support more NumPy syntax:
-     * Add theano.tensor.take()
-     * Add a_tensor_variable.{sort,dot,std,argmin,argmax,argsort,clip,conj,conjugate,repeat,round,trace,real,imag,take}
-
-Commiters for this rc3 only:
-Frederic Bastien
-Ian Goodfellow
-Pascal Lamblin
-Jeremiah Lowin
-abalkin
-Olivier Delalleau
-Razvan Pascanu
-Rami Al-Rfou'
-Vivek Kulkarni
-Guillaume Desjardins
-David Warde-Farley
-Eric Hunsberger
-Amir Elaguizy
-James Bergstra
-
-Bug fix:
- * Fix memory leak on the GPU in some corner cases with the Theano flags `allow_gc=False`. (Frederic B., reported by Jonas Gehring)
- * Fix copy of random state between graph. (Guillaume D.)
-   http://deeplearning.net/software/theano/tutorial/examples.html#copying-random-state-between-theano-graphs
- * Fix wrong dtype in sandbox.linalg.ExtractDiag with shape of 0. (Frederic B., reported by abalkin)
- * Correctly support array with more then 2*10e32 element in AdvancedSubtensor1. (Abalkin)
- * Fix wrong broadcast dimensions of output of Repeat op. (Abalkin)
-   We where using the inputs broadcasting pattern in some cases when we shouldn't.
- * Fix theano.sandbox.linalg.eigh grad that didn't always returned the right dtype. (Frederic B., Olivier D.)
-
-New Features:
- * More Theano determinism (Ian G., Olivier D., Pascal L.)
-     * Add and use a new class OrderedSet.
-     * theano.grad is now deterministic.
-     * Warn when the user uses a (non ordered) dictionary and this causes non-determinism in Theano.
-     * The Updates class was non-deterministic; replaced it with the OrderedUpdates class.
- * tensor.tensordot now support Rop/Lop (Jeremiah Lowin)
-   This remove the class TensorDot and TensorDotGrad. It is the Dot/Elemwise ops that are used.
- * tensor.dot support n-dimensional inputs as NumPy (Jeremiah Lowin)
-   Work on the GPU too.
- * The Theano flag `nvcc.flags` now accept `-ftz=true`, `--prec-div=false` and `--prec=sqrt=false` as value. (Frederic B.)
-   To enable all of them, use the Theano flag `nvcc.flags=--use_fast_math`.
- * New op theano.sparse.ConstructSparseFromList (Rami Al-Rfou'  Vivek Kulkarni)
- * Make Theano work with Anaconda on Windows. (Pascal L.)
- * Add tensor_var.diagonal and theano.tensor.{diag,diagonal}. (abalkin)
- * AdvencedSubtensor1 can now have a sparse gradient. (Rami Al-Rfou', Vivek Kulkarni)
- * Implemented GpuContiguous.grad. (Ian G.)
-
-Interface Deprecation (a warning is printed):
- * theano.misc.strutil.renderString -> render_string (Ian G.)
- * Print a warning when using dictionary and this makes Theano non-deterministic.
-
-Interface Change:
- * Raise an error when theano.shared called with a theano variable. (Frederic B.)
- * Don't print warning for bug before Theano 0.5 by default. (Frederic B.)
- * Theano functions now always have a field name, default to None. (Frederic B.)
- * Theano function fct.fgraph have a copy of the Theano function name field. (Ian G.)
-   This is needed to allow the fgraph to know it.
- * In the grad method, if it were asked to raise an error if there is no path between the variables, we didn't always returned an error. (Ian G.)
-   We returned the mathematical right answer 0 in those cases.
- * get_constant_value() renamed get_scalar_constant_value() and raise a new exception tensor.basic.NotScalarConstantError. (Ian G.)
- * theano.function raises an error when trying to replace inputs with the 'given' parameter. (Olivier D.)
-   This was doing nothing, the error message explains what the user probably wants to do.
-
-New Interface (reuse existing functionality):
- * tensor_var.sort() as a shortcut for theano.tensor.sort. (Jeremiah Lowin)
-   We where already doing this for argsort.
- * Add theano.tensor.take() and a_tensor_var.take() to support NumPy syntax. (abalkin)
- * Add a_tensor_variable.{dot,std,argmin,argmax,argsort,clip,conj,conjugate,repeat,round,trace,real,imag}. (abalkin)
-
-New debug feature:
- * DebugMode print more info when there is an error. (Frederic B.)
- * Better profiling of test time with `theano-nose --time-profile`. (Frederic B.)
- * Detection of infinite loop with global optimizer. (Pascal L.)
- * DebugMode.check_preallocated_output now also work on Theano function output. (Pascal L.)
- * DebugMode will now complain when the strides of CudaNdarray of dimensions of 1 are not 0. (Frederic B.)
-
-Speed-ups:
- * c_code for SpecifyShape op. (Frederic B.)
- * cross-entropy optimization now work when specify_shape is used. (Pascal L.)
- * The Scan optimization ScanSaveMem and PushOutDot1 applied more frequently. (Razvan P, reported Abalkin)
-   A skipped optimization warning was printed.
- * dot(vector, vector) now faster with some BLAS implementation. (Eric Hunsberger)
-   OpenBLAS and possibly others didn't call {s,d}dot internally when we called {s,d}gemv.
-   MKL was doing this.
- * Compilation speed up: Take the compiledir lock only for op that generate c_code. (Frederic B)
- * More scan optimization (Razvan P.)
-     * Opt to make RNN fast in Theano.
-     * Optimize some case of dot, by moving them outside of Scan.
-     * Move some sequences outside of scan too.
-     * Merge more scan inputs, mostly byproduct of other Scan optimizations.
- * c_code for theano.sparse.AddSD. (Rami Al-Rfou',  Vivek Kulkarni)
-
-Crash Fixes:
- * Fix crash about dimshuffle. (abalkin)
- * Fix crash at compilation. (Olivier D.)
- * Fix openmp detection. (Pascal L.)
-   Resulted in a crash with EPD on Windows.
- * Fix for new BLAS interface in SciPy. (Olivier D.)
-   Fix crash with some development version of SciPy.
- * GpuSum work with bigger shape when summing on the first dim on 3d tensor. (Frederic B., reported Chris Currivan)
- * Windows compilation crash fix. (Frederic B.)
- * Make CrossentropySoftmax1HotWithBiasDx and CrossentropySoftmaxArgmax1HotWithBias support uint* dtype. (Frederic B., reported by Mark Fenner)
- * Fix GpuSoftmax and GpuSoftmaxWithBias crash on GTX285. (Frederic B.)
- * Fix crash due to a race condition when importing theano. (Ian G.)
- * Fix crash from path problem with `theano-nose --batch`. (Abalkin)
- * Fix crash with tensor.roll(Var, iscalar). (Frederic B., reported by Jeremiah Lowin)
- * Fix compilation crash with llvm on Mac. (Abalkin)
- * Fix the grad of Scan that told wrongly that there is no connection between cost and parameters. (Razvan P.)
- * The infer shape mechanism now force that broadcasted dimensions have a shape know to be equivalent to one during compilation.
-   Sometimes, we where not able knowing this before run time and resulted in crash. (Frederic B.)
- * Fix compilation problems on GPU on Windows. (Frederic B.)
- * Fix copy on the GPU with big shape for 4d tensor (Pascal L.)
- * GpuSubtensor didn't set the stride to 0 for dimensions of 1. This could lead to check failing later that caused a crash. (Frederic B., reported by vmichals)
-
-Theoretical bugfix (bug that won't happen with current Theano code, but if you messed with the internal, could have affected you):
- * GpuContiguous, GpuAlloc, GpuDownSampleGrad, Conv2d now check the preallocated outputs strides before using it. (Pascal L.)
- * GpuDownSample, GpuDownSampleGrad didn't work correctly with negative strides in their output due to problem with nvcc (Pascal L, reported by abalkin?)
-
-Others:
- * Fix race condition when determining if g++ is available. (Abalkin)
- * Documentation improvements. (Many people including David W-F, abalkin, Amir Elaguizy, Olivier D., Frederic B.)
- * The current GPU back-end have a new function CudaNdarray_prep_output(CudaNdarray ** arr, int nd, const int * dims) (Ian G)
-
-
-Theano 0.6rc2 (November 21th, 2012)
-===================================
-
-Highlights:
- * Fix for a few regressions introduced in 0.6rc1.
- * A few new features.
- * Speed-ups.
- * Scan fixes.
- * Crash fixes.
- * A few small interface changes.
-
-Commiters for this rc2 only:
-Razvan Pascanu
-Pascal Lamblin
-Frederic Bastien
-Ian Goodfellow
-Jeremiah Lowin
-Caglar Gulcehre
-Jey Kottalam
-Matthew Rocklin
-abalkin
-
-
-Regressions in 0.6rc1 fixed:
- * Fixed the scan gradient dtype issue. In 0.6rc1, some upcast were inserted. (Razvan P.)
- * Now grad() will do as before 0.6rc1 for float, i.e. the grad dtype will be the same as the inputs inside the graph. If you ask for the direct grad, it will return the computed dtype. (Pascal L.)
-
-Wrong results fixes:
- * Scan fix in some case didn't returned the good results. (Razvan P., reported by Jeremiah L.)
-   This happened if you had a state with only neg tap and the output of the state was a function of some sequence.
-   If you had multiple states, there was no problem.
- * Fixed bug in Scan with multiple outputs,
-   where one output would sometimes overwrite another one. (Razvan P.)
- * Clip.grad treated the gradient with respect to the clipping boundary as always 0. (Ian G.)
-
-Interface changes:
- * We do not support anymore unaligned ndarray in Python code. (Frederic B.)
-   We did not support it in C code and supporting it in Python code made
-   the detection harder.
- * Now we only officially support SciPy 0.7.2 and NumPy 1.5.0 (Frederic B.)
-   We weren't and aren't testing with older versions.
- * The theano.sparse.SparseType is available even when SciPy is not (Frederic B.)
- * Fixed issue where members of consider_constant grad parameter
-   were treated differently from Constant variables. (Ian G.)
- * Removed the parameter g_cost from theano.grad(). (Ian G.)
-   Use the new more powerful parameter known_grads instead.
-
-NumPy interface support:
- * theano.tensor.where is an alias for theano.tensor.switch to support NumPy semantic. (Ian G.)
- * TensorVariable objects now have dot, argmin, argmax, clip, conj, repeat, trace, std, round,
-   ravel and argsort functions and the real and imag properties as numpy.ndarray objects.
-   The functionality was already available in Theano. (abalkin)
-
-Speed-ups:
- * A C version of the SoftMax op (Razvan P.)
-   There was C code for the softmax with bias code.
- * Faster GpuIncSubtensor (Ian G.)
- * Faster copy on the GPU for 4d tensor. (Ian G.)
- * The fix of flatten infer_shape re-enables an optimization (Pascal L.)
-   * The bug was introduced in 0.6rc1.
- * Enable inc_subtensor on the GPU when updating it with a float64 dtype. (Ian G.)
-   It was causing an optimization warning.
- * Make DeepCopy reuse preallocated memory. (Frederic B.)
- * Move the convolution to the GPU when the image shape and logical image shape differ. (Frederic Bastien)
- * C code for the View Op (Razvan P., Pascal L.)
-
-New Features:
- * Added a monitoring mode "MonitorMode" as a debugging tool. (Olivier D.)
- * Allow integer axes when keepdims==True (Jeremiah Lowin)
- * Added erfinv and erfcinv op. (Jey Kottalam)
- * Added tensor.batched_dot(). (Caglar Gulcehre)
-   It uses scan behind the scenes, but makes doing this easier.
- * theano.get_constant_value(x) (Frederic B.)
-   This tries to have x as a constant int.
-   This does some constant folding to try to convert x into an int.
-   Used by some optimizations.
- * Add theano.tensor.io.{MPIRecv,MPIRecvWait,MPISend,MPISendWait} (Matthew Rocklin)
-   Theano does not automatically use them. It is up to you to use them and split your computations.
- * Added theano.sandbox.linalg.eig (abalkin)
- * Started some support for Python3 (abalkin)
-   setup.py supports python3 now.
-   It calls 2to3 during the setup.
-   Python3 is not fully supported as we didn't update the C code.
-
-
-Crash Fixes:
- * Fix a crash related to scan.grad due to the new mechanism. (Ian G.)
- * Fix an optimization warning. Now it gets optimized. (Frederic B.)
- * Fix crash introduced in 0.6rc1 in theano.grad (Ian G.)
- * Fix crash introduced in 0.6rc1 in the grad of scan (Razvan P.)
- * Fix crash introduced in 0.6rc1 in the grad of clip (Ian G.)
-   Also implement the gradient on the min/max bound.
- * Fix crash in the grad of tensor.switch for int (Ian G.)
- * Fix crash when mixing shared variable on the GPU and sparse dot. (Pascal L.)
- * Fix crash as sometimes sparse.dot would return a different dtype number
-   that is equivalent but not the one expected. (Pascal L., reported by Rami Al-Rfou)
- * Better error msg (Ian G.)
- * Move all sparse random functions back to sandbox as they don't have a state inside Theano. (Pascal L.)
-   They were moved outside the sandbox in 0.6rc1
- * LoadFromDisk now is allowed to only support some memmap mode. (Pascal L.)
-   Otherwise, this was causing errors, segmentation faults or wrong results.
- * Fix import problem on PiCloud (Jeremiah Lowin)
-    * You need to use the c|py linker with the default
-      environment. Otherwise, you need to create your own environment.
- * Fix a crash during optimization when we take a subtensor of a constant with a non constant index. (Ian G.)
- * Better handling and error message of gradients on integer. (Ian G.)
- * Fixed a crash where Scan assumed all TypeErrors raised by the grad function were due to undefined gradients (Ian G.)
-
-Other:
- * Doc typo fixes, Doc updates, Better error messages: Olivier D., David W.F., Frederic B., James B., Matthew Rocklin, Ian G., abalkin.
-
-
-Theano 0.6rc1 (October 1st, 2012)
-=================================
-
-Highlights:
- * Bug fixes, crash fixes, CPU and GPU speed up.
- * theano_var.eval({other_var: val[,...]} to simplify the usage of Theano (Ian G.)
- * New default linker `cvm`. This is the execution engine that tells ops to run in certain orders.
-   It is now implemented in C and enables lazy evaluation of ifelse op.
- * Faster theano.function compilation. (Pascal L., Ian G.)
- * Big sparse submodule update and documentation of it. (Nicolas Bouchard)
- * Use GPU asynchronous functionality (Frederic B.)
- * Better Windows support.
-
-Known bugs:
- * A few crash cases that will be fixed by the final release.
-
-Bug fixes:
- * Outputs of Scan nodes could contain corrupted values: some parts of the
-   output would be repeated a second time, instead of the correct values.
-   It happened randomly, and quite infrequently, but the bug has been present
-   (both in Python and Cython) since April 2011. (Pascal L.)
- * In Sparse sandbox, fix the grad of theano.sparse.sandbox.sp.row_scale.
-   It did not return the right number of elements. (Frederic B.)
- * set_subtensor(x[int vector], new_value) when moved to the GPU
-   was transformed into inc_subtensor on the GPU. Now we have a correct
-   (but slow) GPU implementation.
-   Note 1: set_subtensor(x[slice[,...]], new_value) was working correctly
-   in all cases as well as all inc_subtensor.
-   Note 2: If your code was affected by the incorrect behavior, we now print
-   a warning by default (Frederic B.)
- * Fixed an issue whereby config values were used as default arguments,
-   with those defaults then stuck at old values if the config variables were
-   changed during program execution. (David W-F)
- * Fixed many subtle bugs involving mutable default arguments which may have
-   led to unexpected behavior, such as objects sharing instance variables
-   they were not supposed to share. (David W-F)
- * Correctly record the GPU device number used when we let the driver select it.
-   (Frederic B.)
- * Min, max with NaN in inputs did not return the right output. (Pascal L.)
- * The grad of TensorDot, was returning the wrong shape for some combination of axes.
-   We now raise NotImplementedError in those cases. (Frederic B.)
- * conv2d with subsample >2 returned wrong values. (Pascal L.)
-     * Fixed when mode==valid, disabled when mode==full
- * theano.sparse.CSMGrad op (generated by the grad of CSM) didn't
-   handle unsorted input correctly and gradient that is sparser
-   than the input. In that case, a bad result was returned. But this could
-   happen only when a sparse input of a Theano function was not
-   sorted. This happens for example with sparse advanced indexing from
-   scipy. The conclusion is most of time Nan in the graph.
-   (Yann Dauphin)
- * theano.sparse._dot(CSC matrix, dense) optimized version UsmmCSCDense didn't handle
-   correctly not contiguous inputs/outputs. (Pascal L.)
- * Fix a corner case CVM updates case. (Pascal L.)
-   This happened if the update to a shared variable is itself after optimization.
-   The CVM was not used by default.
- * Fix the view_map of sparse.Transpose and sparse.sandbow.sp.RowScale. (Frederic B.)
-   This probably didn't cause problem as there is only the UsmmCscDense op
-   (used call to Usmm with CSC matrix) that could interfere with them.
-
-Deprecation:
- * Deprecated the Module class (Ian G.)
-   This was a predecessor of SharedVariable with a less pythonic philosophy.
-
-Interface changes:
- * Now the base version requirements are numpy >= 1.5.0 and the optional scipy >= 0.7.2.
- * In Theano 0.5, we removed the deprecated sharedvar.value property.
-   Now we raise an error if you access it. (Frederic B.)
- * theano.function does not accept duplicate inputs, so function([x, x], ...)
-   does not work anymore. (Pascal L.)
- * theano.function now raises an error if some of the provided inputs are
-   not part of the computational graph needed to compute the output, for
-   instance, function([x, y], [y]). You can use the kwarg
-   ``on_unused_input={'raise', 'warn', 'ignore'}`` to control this.
-   (Pascal L.)
- * New Theano flag "on_unused_input" that defines the default value of the
-   previous point. (Frederic B.)
- * tensor.alloc() now raises an error during graph build time
-   when we try to create less dimensions than the number of dimensions
-   the provided value have. In the past, the error was at run time.
-   (Frederic B.)
- * Remove theano.Value and related stuff (Ian G.)
-   This was a test of what ended up as SharedVariable.
- * Renamed Env to FunctionGraph, and object attribute "env" to "fgraph" (Ian G.)
-   Deprecation warning printed when you try to access the "env" attribute.
- * Renamed the FunctionGraph.nodes attribute to FunctionNodes.apply_nodes (Ian G.)
- * Warn when we don't handle correctly the parameter in Theano flags `nvcc.flags`
-   (Frederic B.)
- * Do not reorder the user flags passed to the compiler. They get set after other flags. (Frederic B.)
- * Make setuptools optional (Ilan Schnell)
- * We warn when a user tries to use an old GPU with which Theano is untested.
-   This could cause crash and will also be very slow. (Frederic B.)
- * Make theano.grad able to differentiate between not implemented, undefined and disconnected grad.
-   Op.grad function should return theano.gradient.{grad_not_implemented,grad_undefined} or
-   something of DisconectedType (Ian G.)
- * Make theano.grad expect to always receive a float or undefined
-   gradient and enforce that op with integer output values always
-   return 0. (Ian G.)
-
-
-New memory output contract (was mentioned in the release notes of Theano 0.5):
- * Now the output memory received can be preallocated by other stuff.
-   In the past it was always the previous output an Apply node allocated.
-   So this means that the shape and strides can be different from previous calls
-   and there can be links to this memory at other places.
-   This means it could receive preallocated output that is not c_contiguous.
-   But we don't do that now. (Pascal L.)
- * New Theano flags to test this DebugMode.check_preallocated_output (Pascal L.)
- * Updated a few ops to respect this contract (Pascal L.)
-
-
-New Features:
- * GPU scan now works (does not crash) when there is a mixture of float32 and other dtypes.
- * theano_var.eval({other_var:val[,...]} to simplify the usage of Theano (Ian G.)
- * debugprint new param ids=["CHAR", "id", "int", ""]
-   This makes the identifier printed to be a unique char, the Python id, a
-   unique int, or not have it printed. We changed the default to be "CHAR"
-   as this is more readable. (Frederic B.)
- * debugprint new param stop_on_name=[False, True]. If True, we don't print
-   anything below an intermediate variable that has a name. Defaults to False.
-   (Frederic B.)
- * debugprint does not print anymore the "|" symbol in a column after the last input. (Frederic B.)
- * If you use Enthought Python Distribution (EPD) now we use its blas
-   implementation by default. (Frederic B., Graham Taylor, Simon McGregor)
- * MRG random now raises an error with a clear message when the passed shape
-   contains dimensions with bad value like 0. (Frederic B. reported by Ian G.)
- * "CudaNdarray[*] = ndarray" works in more cases (Frederic B.)
- * "CudaNdarray[*] += ndarray" works in more cases (Frederic B.)
- * We add dimensions to CudaNdarray to automatically broadcast more frequently.
-   (Frederic B.)
- * New theano flag cmodule.warn_no_version. Default False. If True,
-   will print a warning when compiling one or more Op with C code that
-   can't be cached because there is no c_code_cache_version() function
-   associated to at least one of those Ops.  (Frederic B.)
- * CPU alloc now always generate C code (Pascal L.)
- * New Theano flag cmodule.warn_no_version=False. When True, warn when an op
-   with C code is not versioned (which forces to recompile it everytimes).
-   (Frederic B.)
- * C code reuses preallocated outputs (only done by Scan) (Pascal L.)
- * Garbage collection of intermediate results during Theano function calls
-   for Ops with C code (Pascal L.)
- * Theano flag compiledir_format now supports the parameter "numpy_version" and "g++". (Frederic B.)
- * Theano GPU variables, shared variables and constants now support <, <=,
-   > and >= similar to those not on the GPU.
- * AdvancedIncSubtensor now supports the set_instead_of_inc parameter. (Eric L.)
- * Added Advanced Indexing support to inc_subtensor and set_subtensor. (Eric L.)
- * theano.tensor.{any,all,std,var,mean,prod,sum,argmin,argmax,min,max,max_and_argman}
-   have a new parameter keepdims (Eric L.)
-   This allows to broadcast it correctly against the input data to normalize it.
- * The Updates objects now check that the keys are SharedVariable when we pass them
-   in the __init__ function. (Pascal L.)
- * Set a Theano Variable name on transposed op when the input has one (Frederic B).
- * The cvm linker now supports garbage collection (enabled by default). (James B. Arnaud B., Pascal L.)
- * The cvm linker is now the default linker.
-   This makes the "loop" around the execution of apply node in C. So this lowers the overhead.
- * theano_variable[numpy.newaxis] is now supported (James B.)
- * Enable ifelse on the GPU. (Frederic B.)
- * Correctly support numpy.memmap everywhere (Pascal L.)
-   We add partial support for them before. Just use the normal tensor operation
-   on them and it should work.
-   But be careful not to exhaust your computer memory! (we always generate normal ndarray)
- * Add an optimization that stabilizes log(softmax(x)). (Ian G.)
- * Re-enable the Images2Neibs grad. It was not broken, the problem was how we tested it. (Frederic B.)
- * If `theano_fn.trust_input` is set to False, do not check if the inputs are good
-   when calling the theano function. (Frederic B.)
- * Add theano.tensor.blas,gem{m,v} as shortcut.
- * theano.grad(..., add_names=True). False for the old
-   behavior. Otherwise it tries to name the grad variables. (Ian G.)
- * theano-nose (Pascal L.)
-   A wrapper around nosetests that adds needed extensions.
-   * --profile-time option, to print time spent in each test (Eric L.)
-   * --batch option, to allow to run tests in batch to lower memory requirement.
- * m = mean(log(1 - sigm(x)))
-   x - scalar * theano.grad(m, x)
-   There is a stabilization optimization for this.
-   Now it is applied more frequently. (Pascal L.)
-
-
-New Op/functions:
- * Added element-wise operation theano.tensor.{GammaLn,Psi} (John Salvatier, Nicolas Bouchard)
- * Added element-wise operation theano.tensor.{arcsin,arctan,arccosh,arcsinh,arctanh,exp2,arctan2} (Nicolas Bouchard)
- * Added element-wise operation theano.tensor.{gamma,conj,complex_from_polar,expm1,deg2rad,rad2deg,trunc,gamma} (Nicolas Bouchard)
- * Added theano.tensor.argsort that wraps numpy.argsort (Hani Almousli).
- * Added theano.tensor.diff that wraps numpy.diff (Nicolas B.)
- * Added theano.tensor.bincount that wraps numpy.bincount (Nicolas B., Pascal L, Frederic B.)
- * Added theano.tensor.squeeze (Nicolas B.)
-   This removes broadcasted dimensions from the variable.
-   Theano-esque version of numpy.squeeze.
- * Added theano.tensor.repeat that wraps numpy.repeat (Nicolas B. + PL)
- * Added theano.tensor.bartlett that wraps  numpy.bartlett (Eric L.)
- * Added theano.tensor.fill_diagonal that wraps numpy.fill_diagonal (Eric L., Frederic B.)
- * Added tensor.square that is an alias for tensor.sqr as NumPy (Ian G.)
- * Added theano.tensor.load(path, dtype, broadcastable, mmap_mode=None) op
-   that allows to load a .npy file in a theano graph (Matthew Rocklin)
- * theano.sandbox.linalg.kron.py:Kron op. (Eric L.)
-   Kronecker product
-
-Speed up:
- * CPU convolutions are now parallelized (Frederic B.)
-   By default use all cores/hyper-threads.
-   To control it, use the `OMP_NUM_THREADS=N` environment variable where N is the number of
-   parallel threads to use. By default it is equal to the number of CPU cores/hyper
-   threads that you have.
-   There is a new Theano flag `openmp` to allow/disallow openmp op.
-   If your BLAS library is parallelized, this flag won't affect it, but the
-   env variable will.
- * Remove a corner case causing duplicated dot22/gemm in the graph. (Frederic B., Ian G.)
- * Enable fusion of elemwise that have the same clients multiple times. (Frederic B.)
- * New optimization: Remove reduction over broadcastable dimensions (James B., Frederic B.)
- * Faster theano.function compilation. (Pascal L., Ian G.)
- * Remove GPU transfer around specify_shape op. (Frederic B.)
- * Implemented/tested MANY op.infer_shape method (Eric Larsen)
-   This allows Theano to make better shape inferance.
- * Implement Solve.infer_shape (Matthew Rocklin)
- * Scan memory optimizations now work more frequently. (Razvan P.)
-   There was a warning printed by the subtensor optimization in those cases.
- * Faster rng_mrg Python code. (mostly used for tests) (Frederic B.)
-
-Speed up GPU:
- * Convolution on the GPU now checks the generation of the card to make
-   it faster in some cases (especially medium/big ouput image) (Frederic B.)
-
-     * We had hardcoded 512 as the maximum number of threads per block. Newer cards
-       support up to 1024 threads per block.
- * Faster GpuAdvancedSubtensor1, GpuSubtensor, GpuAlloc (Frederic B.)
- * We now pass the GPU architecture to nvcc when compiling (Frederic B.)
- * Now we use the GPU function async feature by default. (Frederic B.)
-   Set the environment variable `CUDA_LAUNCH_BLOCKING` to `1` to disable this
-   for profiling or debugging.
- * Faster creation of CudaNdarray objects (Frederic B.)
- * Now some Max reductions are implemented on the GPU. (Ian G.)
-
-Sparse Sandbox graduate (moved from theano.sparse.sandbox.sp):
- * sparse.remove0 (Frederic B., Nicolas B.)
- * sparse.sp_sum(a, axis=None) (Nicolas B.)
-     * bugfix: the not structured grad was returning a structured grad.
- * sparse.{col_scale,row_scale,ensure_sorted_indices,clean} (Nicolas B.)
- * sparse.{diag,square_diagonal} (Nicolas B.)
-
-Sparse:
- * Support for uint* dtype.
- * Implement theano.sparse.mul(sparse1, sparse2) when both inputs don't
-   have the same sparsity pattern. (Frederic B.)
- * New Ops: sparse.{expm1,deg2rad,rad2deg,trunc} (Nicolas B.)
- * New Ops: sparse.{sqrt,sqr,log1p,floor,ceil,sgn,round_half_to_even} (Nicolas B.)
- * New Ops: sparse.{arctanh,tanh,arcsinh,sinh,arctan,arcsin,tan,sin} (Nicolas B.)
- * New functions: structured_{add,exp,log,pow,minimum,maximum,sigmoid} (Yann D., Nicolas B.)
-     * Optimized op: StructuredAddSV, StrucutedAddSVCSR (inserted automatically)
- * New Op: sparse.mul_s_v multiplication of sparse matrix by broadcasted vector (Yann D.)
- * New Op: sparse.Cast() (Yann D., Nicolas B.)
-     * Add sparse_variable.astype() and theano.sparse.cast() and
-       theano.sparse.{b,w,i,l,f,d,c,z}cast() as their tensor equivalent (Nicolas B.)
- * Op class: SamplingDot (Yann D., Nicolas B.)
-   * Optimized version: SamplingDotCsr, StructuredDotCSC
-   * Optimizations to insert the optimized version: local_sampling_dot_csr, local_structured_add_s_v
- * New Ops: sparse.{Multinomial,Poisson,Binomial} (Yann D., NB)
- * Implement the CSMProperties grad method (Yann Dauphin)
- * Move optimizations to theano/sparse/opt.py (Nicolas B.)
-
-New flags:
- * `profile=True` flag now prints the sum of all printed profiles. (Frederic B.)
-     * It works with the linkers vm/cvm (default).
-     * Also print compile time, optimizer time and linker time.
-     * Also print a summary by op class.
- * new flag "profile_optimizer" (Frederic B.)
-   when profile=True, will also print the time spent in each optimizer.
-   Useful to find optimization bottleneck.
- * new flag "cmodule.remove_gxx_opt" (Frederic B.)
-   If True, will remove -O* parameter passed to g++.
-   This is useful to debug in gdb module compiled by Theano.
-   The parameter -g is passed by default to g++.
- * new flag cmodule.compilation_warning
-   if True, will print compilation warning.
- * new flag `allow_gc` (Frederic B.)
-   When False, do not garbage collect intermediate results when they are not needed.
-   This uses more memory, but allocates memory less frequently so faster.
- * new flag `vm.lazy` (Frederic B.)
-   Useful only for the vm linkers. When lazy is None,
-   auto detect if lazy evaluation is needed and use the apropriate
-   version. If lazy is True/False, force the version used between
-   Loop/LoopGC and Stack.
- * new flag `cxx`. This is the C++ compiler to use. If empty do not compile C code. (Frederic B.)
- * New flag `print_active_device` that defaults to True. (Matthew R.)
-
-Documentation:
- * Added in the tutorial documentation on how to extend Theano.
-   This explains how to make a Theano Op from a Python function.
-   http://deeplearning.net/software/theano/tutorial/extending_theano.html
-   (Frederic B.)
- * New installation instructions for Windows using EPD (Pascal L.)
- * New installation on Windows by using a Linux VM from ContinuumIO (Frederic B.)
- * Revisions of Theano tutorial and addition of exercises to it. (Eric L.)
- * New tutorial on Sparse variable. (Nicolas B., Sebastien Lemieux, Frederic Bastien
-   http://www.deeplearning.net/software/theano/tutorial/sparse.html
- * Installation documentation for CentOS6 (Frederic B.)
- * Installation documentation for Ubuntu (with GPU) (Frederic B., Matthias Zoehrer)
- * Doc typo fixes, Doc updates, Better error messages: Olivier D., David W.F., Frederic B., James B., Matthew Rocklin, Ian G.
- * Python Memory Management tutorial (Steven Pigeon, Olivier D.)
-
-Proposal:
- * Math framework for complex gradients (Pascal L.)
-
-
-Internal changes:
- * Define new exceptions MissingInputError and UnusedInputError, and use them
-   in theano.function, instead of TypeError and ValueError. (Pascal L.)
- * Better handling of bitwidth and max values of integers and pointers
-   across platforms (Pascal L.)
- * Made a few Ops with C code versioned to reduce compilation time.
-   (Frederic B, Pascal L.)
- * Better deletion of files in the compiledir (Frederic B.)
- * Safer import on sort op (Nicolas Pinto)
- * hash_from_dict for elemwise op (Fredric B.)
- * Renamed BadCLinkerOutput into BadThunkOutput. (PL)
- * tensor.utils.shape_of_variables (Matthew R.)
- * Add the numpy abi version and g++/nvcc version in the key of compiled code. (Frederic B.)
- * env.replace_all_validate_remove (Frederic B.)
-   This allows global optimizer to ensure it removed some nodes from the graph.
-   This is a generic way to catch errors that would otherwise duplicate
-   computation.
-   * It was used for GEMM and Scan optimization (Frederic B., Razvan P.)
- * Fix how exception are raised in GPU code (James B.)
- * Made code respect pep8: OD, Fred, Pascal L., Nicolas Bouchard, Eric Larsen and others.
- * TensorType and CudaNdarrayType now have a value_zeros method that call CudaNdarray.zeros or
-   numpy.zeros with the right dtype. (Pascal L., Olivier D.)
-   This allows to have the same code work with both types.
- * Renamed FunctionGraph.extend function to FunctionGraph.attach_feature. (Ian G.)
- * New exception MissingGXX when we try to compile but there is no cxx compiler. (Frederic B.)
- * New fct theano.gof.utils.give_variables_names(...) that gives unique names to variables. (Matthew R.)
- * Use most of the time the new NumPy C-API for later NumPy release. (Frederic B.)
- * New theano.gof.sched.sort_apply_nodes() that will allow other execution ordering. (Matthew R.)
- * New attribute sort_schedule_fn, a way to specify a scheduler to use. (Matthew R.)
-
-Crash Fix:
- * Fix import conflict name (usaar33, Frederic B.)
-    * This makes Theano work with PiCloud.
- * Do not try to use the BLAS library when blas.ldflags is manually set to an
-   empty string (Frederic B., Pascal L.)
- * When importing theano on a computer without GPU with the Theano
-   flags 'device' or 'init_gpu_device' set to gpu* (Frederic B., reported by  Luo Heng)
- * Optimization printed a useless error when scipy was not available. (Frederic B.)
- * GPU conv crash/slowdown on newer hardware (James B.)
- * Better error handling in GPU conv (Frederic B.)
- * GPU optimization that moves element-wise Ops to the GPU. Crash happened in
-   a particular execution order of this optimization and the
-   element-wise fusion optimization when upcasting some inputs to
-   float32 (to compute them on the GPU).
-   (Frederic B., reported by Sander Dieleman)
- * GpuReshape in some particular case when the input is not contiguous
-   (Frederic B., reported by Sander Dieleman)
- * GpuSoftmaxWithBias with shape (0, N) with N > 1.
-   (Frederic B., reported by Razvan P.)
- * Fix crash under 64-bit Windows, when taking subtensors of the form a[n:]
-   (Pascal L., reported by Simon McGregor)
- * Fixed issue with the MaxAndArgmax Op not properly preserving broadcastable
-   dimensions, which could typically result in optimization crashes (Olivier D.)
- * Fixed crash when concatenating some arrays with specific broadcasting
-   patterns (Olivier D.)
- * Work around a known issue with nvcc 4.1 on MacOS X. (Graham Taylor)
- * In advanced indexing, if some inputs are constant, no need to call constant(...)
-   on their value any more. (Pascal L., reported by John Salvatier)
- * Fix crash on GPU when the GpuSubtensor didn't put the right stride
-   when the result tensor had a dimension with size of 1. (Pascal L,
-   reported Graham T.)
- * Fix scan crash that made it not run on the GPU in one case. (Guillaume D.)
- * If you grad again a random state, don't crash (Razvan P.)
- * GpuDownsampleFactorMax and its grad with inputs dimensions 0 and 1 bigger then 65535.
-   (Frederic B. reported by Gabe Schwartz)
- * Potential crash due to parallel compilation when importing theano.sandbox.cuda
-   (Olivier D.)
- * Crash fix on python 2.4 with slicing. (Pascal L.)
- * grad of argmin and argmax (Razvan P.)
- * Don't compute the Rop for shared variables with updates (mostly random).
-   We don't use them and they caused crash. (Razvan P.)
- * MaxArgmax.grad() when one of the gradient it receives is None. (Razvan P, reported by Mark Fenner)
- * Fix crash of GpuSum when some dimensions shape was 0. (Frederic B.)
-
-Tests:
- * Use less memory (Olivier D.) (fix crash on 32-bit computers)
- * Fix test with Theano flag "blas.ldflags=". (Frederic B., Pascal L.)
- * Fix crash with advanced subtensor and numpy constant.
- * Fix random tests crash due to random value. (Pascal L.)
- * Always introduce Alloc node when calling alloc and let the optimizer remove them if needed.
-   This allows DebugMode to catch some shape error. (Pascal L.)
- * DebugMode now checks the view_map for all types of Theano variables.
-   It was doing only variables of tensor type. (Frederic B.)
-
-Others:
- * Remove python warning for some python version. (Gabe Schwartz)
- * Remove useless fill op in fast_compile mode to make the graph more readable. (Fredric B.)
- * Remove GpuOuter as it is a subset of the new GpuGer (Frederic B.)
- * Now we use http://travis-ci.org/ to run all CPU tests (without SciPy)
-   with the default mode on all Pull Requests.
-   This should make the trunk more stable. (Fredric B.)
- * Our nightly buildbot now checks on python 2.4 (Frederic B.)
-   This should make the trunk work on it more frequently.
-
-Other thanks:
- * blaxill reported an error introduced into the trunk.
-
-New stuff that will probably be reworked/removed before the release:
- * Better PyCUDA sharing of the GPU context.(fix crash at exit) (Frederic B.)
-   TODO: there is still a crash at exit!
-
-
-Theano 0.5 (23 February 2012)
-=============================
-
-Highlights:
- * Moved to github: http://github.com/Theano/Theano/
- * Old trac tickets moved to assembla tickets: http://www.assembla.com/spaces/theano/tickets
- * Theano vision: http://deeplearning.net/software/theano/introduction.html#theano-vision (Many people)
- * Theano with GPU works in some cases on Windows now. Still experimental. (Sebastian Urban)
- * Faster dot() call: New/Better direct call to cpu and gpu ger, gemv, gemm
-   and dot(vector, vector). (James, Frédéric, Pascal)
- * C implementation of Alloc. (James, Pascal)
- * theano.grad() now also works with sparse variables. (Arnaud)
- * Macro to implement the Jacobian/Hessian with theano.tensor.{jacobian,hessian} (Razvan)
- * See the Interface changes.
-
-
-Interface Behavior Changes:
- * The current default value of the parameter axis of
-   theano.{max,min,argmax,argmin,max_and_argmax} is now the same as
-   numpy: None. i.e. operate on all dimensions of the tensor.
-   (Frédéric Bastien, Olivier Delalleau) (was deprecated and generated
-   a warning since Theano 0.3 released Nov. 23rd, 2010)
- * The current output dtype of sum with input dtype [u]int* is now always [u]int64.
-   You can specify the output dtype with a new dtype parameter to sum.
-   The output dtype is the one used for the summation.
-   There is no warning in previous Theano versions about this.
-   The consequence is that the sum is done in a dtype with more precision than before.
-   So the sum could be slower, but will be more resistant to overflow.
-   This new behavior is the same as numpy. (Olivier, Pascal)
- * When using a GPU, detect faulty nvidia drivers. This was detected
-   when running Theano tests. Now this is always tested. Faulty
-   drivers result in wrong results for reduce operations. (Frederic B.)
-
-
-Interface Features Removed (most were deprecated):
- * The string modes FAST_RUN_NOGC and STABILIZE are not accepted. They
-   were accepted only by theano.function().
-   Use Mode(linker='c|py_nogc') or Mode(optimizer='stabilize') instead.
- * tensor.grad(cost, wrt) now always returns an object of the "same type" as wrt
-   (list/tuple/TensorVariable). (Ian Goodfellow, Olivier)
- * A few tag.shape and Join.vec_length left have been removed. (Frederic)
- * The .value attribute of shared variables is removed, use shared.set_value()
-   or shared.get_value() instead. (Frederic)
- * Theano config option "home" is not used anymore as it was redundant with "base_compiledir".
-   If you use it, Theano will now raise an error. (Olivier D.)
- * scan interface changes: (Razvan Pascanu)
-    * The use of `return_steps` for specifying how many entries of the output
-      to return has been removed. Instead, apply a subtensor to the output
-      returned by scan to select a certain slice.
-    * The inner function (that scan receives) should return its outputs and
-      updates following this order:
-        [outputs], [updates], [condition].
-      One can skip any of the three if not used, but the order has to stay unchanged.
-
-Interface bug fix:
- * Rop in some case should have returned a list of one Theano variable,
-   but returned the variable itself. (Razvan)
-
-New deprecation (will be removed in Theano 0.6, warning generated if you use them):
- * tensor.shared() renamed to tensor._shared(). You probably want to
-   call theano.shared() instead! (Olivier D.)
-
-
-Bug fixes (incorrect results):
- * On CPU, if the convolution had received explicit shape information,
-   they were not checked at runtime.  This caused wrong result if the
-   input shape was not the one expected. (Frederic, reported by Sander
-   Dieleman)
- * Theoretical bug: in some case we could have GPUSum return bad value.
-   We were not able to reproduce this problem
-     * patterns affected ({0,1}*nb dim, 0 no reduction on this dim, 1 reduction on this dim):
-       01, 011, 0111, 010, 10, 001, 0011, 0101 (Frederic)
- * div by zero in verify_grad. This hid a bug in the grad of Images2Neibs. (James)
- * theano.sandbox.neighbors.Images2Neibs grad was returning a wrong value.
-   The grad is now disabled and returns an error. (Frederic)
- * An expression of the form "1 / (exp(x) +- constant)" was systematically matched to "1 / (exp(x) + 1)"
-   and turned into a sigmoid regardless of the value of the constant. A warning will be issued if your
-   code was affected by this bug. (Olivier, reported by Sander Dieleman)
- * When indexing into a subtensor of negative stride (for instance, x[a:b:-1][c]),
-   an optimization replacing it with a direct indexing (x[d]) used an incorrect formula,
-   leading to incorrect results. (Pascal, reported by Razvan)
- * The tile() function  is now stricter in what it accepts to allow for better
-   error-checking/avoiding nonsensical situations. The gradient has been
-   disabled for the time being as it only implemented (incorrectly) one special
-   case. The `reps` argument must be a constant (not a tensor variable), and
-   must have the same length as the number of dimensions in the `x` argument;
-   this is now checked. (David)
-
-
-Scan fixes:
- * computing grad of a function of grad of scan (reported by Justin Bayer, fix by Razvan)
-   before: most of the time crash, but could be wrong value with bad number of dimensions (so a visible bug)
-   now: do the right thing.
- * gradient with respect to outputs using multiple taps (reported by Timothy, fix by Razvan)
-   before: it used to return wrong values
-   now: do the right thing.
-   Note: The reported case of this bug was happening in conjunction with the
-         save optimization of scan that give run time errors. So if you didn't
-         manually disable the same memory optimization (number in the list4),
-         you are fine if you didn't manually request multiple taps.
- * Rop of gradient of scan (reported by Timothy and Justin Bayer, fix by Razvan)
-   before: compilation error when computing R-op
-   now: do the right thing.
- * save memory optimization of scan (reported by Timothy and Nicolas BL, fix by Razvan)
-   before: for certain corner cases used to result in a runtime shape error
-   now: do the right thing.
- * Scan grad when the input of scan has sequences of different lengths. (Razvan, reported by Michael Forbes)
- * Scan.infer_shape now works correctly when working with a condition for the number of loops.
-   In the past, it returned n_steps as the length, which is not always true. (Razvan)
- * Scan.infer_shape crash fix. (Razvan)
-
-New features:
- * AdvancedIncSubtensor grad defined and tested (Justin Bayer)
- * Adding 1D advanced indexing support to inc_subtensor and set_subtensor (James Bergstra)
- * tensor.{zeros,ones}_like now supports the dtype param as numpy (Frederic)
- * Added configuration flag "exception_verbosity" to control the verbosity of exceptions (Ian)
- * theano-cache list: list the content of the theano cache (Frederic)
- * theano-cache unlock: remove the Theano cache lock (Olivier)
- * tensor.ceil_int_div to compute ceil(a / float(b)) (Frederic)
- * MaxAndArgMax.grad now works with any axis (The op supports only 1 axis) (Frederic)
-     * used by tensor.{max,min,max_and_argmax}
- * tensor.{all,any} (Razvan)
- * tensor.roll as numpy: (Matthew Rocklin, David Warde-Farley)
- * Theano with GPU works in some cases on Windows now. Still experimental. (Sebastian Urban)
- * IfElse now allows to have a list/tuple as the result of the if/else branches.
-     * They must have the same length and corresponding type (Razvan)
- * Argmax output dtype is now int64 instead of int32. (Olivier)
- * Added the element-wise operation arccos. (Ian)
- * Added sparse dot with dense grad output. (Yann Dauphin)
-     * Optimized to Usmm and UsmmCscDense in some case (Yann)
-     * Note: theano.dot and theano.sparse.structured_dot() always had a gradient with the same sparsity pattern as the inputs.
-       The new theano.sparse.dot() has a dense gradient for all inputs.
- * GpuAdvancedSubtensor1 supports broadcasted dimensions. (Frederic)
- * TensorVariable.zeros_like() and SparseVariable.zeros_like()
- * theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.device_properties() (Frederic)
- * theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info() return free and total gpu memory (Frederic)
- * Theano flags compiledir_format. Keep the same default as before: compiledir_%(platform)s-%(processor)s-%(python_version)s. (Josh Bleecher Snyder)
-     * We also support the "theano_version" substitution.
- * IntDiv C code (faster and allows this elemwise to be fused with other elemwise) (Pascal)
- * Internal filter_variable mechanism in Type. (Pascal, Ian)
-    * Ifelse works on sparse.
-    * It makes use of gpu shared variable more transparent with theano.function updates and givens parameter.
- * Added a_tensor.transpose(axes) axes is optional (James)
-    * theano.tensor.transpose(a_tensor, kwargs) We were ignoring kwargs, now it is used as the axes.
- * a_CudaNdarray_object[*] = int, now works (Frederic)
- * tensor_variable.size (as numpy) computes the product of the shape elements. (Olivier)
- * sparse_variable.size (as scipy) computes the number of stored values. (Olivier)
- * sparse_variable[N, N] now works (Li Yao, Frederic)
- * sparse_variable[M:N, O:P] now works (Li Yao, Frederic, Pascal)
-   M, N, O, and P can be Python int or scalar tensor variables, None, or
-   omitted (sparse_variable[:, :M] or sparse_variable[:M, N:] work).
- * tensor.tensordot can now be moved to GPU (Sander Dieleman,
-   Pascal, based on code from Tijmen Tieleman's gnumpy,
-   http://www.cs.toronto.edu/~tijmen/gnumpy.html)
- * Many infer_shape implemented on sparse matrices op. (David W.F.)
- * Added theano.sparse.verify_grad_sparse to easily allow testing grad of
-   sparse op. It supports testing the full and structured gradients.
- * The keys in our cache now store the hash of constants and not the constant values
-   themselves. This is significantly more efficient for big constant arrays. (Frederic B.)
- * 'theano-cache list' lists key files bigger than 1M (Frederic B.)
- * 'theano-cache list' prints an histogram of the number of keys per compiled module (Frederic B.)
- * 'theano-cache list' prints the number of compiled modules per op class (Frederic B.)
- * The Theano flag "nvcc.fastmath" is now also used for the cuda_ndarray.cu file.
- * Add the header_dirs to the hard part of the compilation key. This is
-   currently used only by cuda, but if we use libraries that are only headers,
-   this can be useful. (Frederic B.)
- * The Theano flag "nvcc.flags" is now included in the hard part of the key.
-   This means that now we recompile all modules for each value of "nvcc.flags".
-   A change in "nvcc.flags" used to be ignored for modules that were already
-   compiled. (Frederic B.)
- * Alloc, GpuAlloc are not always pre-computed (constant_folding optimization)
-   at compile time if all their inputs are constant.
-   (Frederic B., Pascal L., reported by Sander Dieleman)
- * New Op tensor.sort(), wrapping numpy.sort (Hani Almousli)
-
-
-New optimizations:
- * AdvancedSubtensor1 reuses preallocated memory if available (scan, c|py_nogc linker) (Frederic)
- * dot22, dot22scalar work with complex. (Frederic)
- * Generate Gemv/Gemm more often. (James)
- * Remove scan when all computations can be moved outside the loop. (Razvan)
- * scan optimization done earlier. This allows other optimizations to be applied. (Frederic, Guillaume, Razvan)
- * exp(x) * sigmoid(-x) is now correctly optimized to the more stable form sigmoid(x). (Olivier)
- * Added Subtensor(Rebroadcast(x)) => Rebroadcast(Subtensor(x)) optimization. (Guillaume)
- * Made the optimization process faster. (James)
- * Allow fusion of elemwise when the scalar op needs support code. (James)
- * Better opt that lifts transpose around dot. (James)
-
-
-Crashes fixed:
- * T.mean crash at graph building time. (Ian)
- * "Interactive debugger" crash fix. (Ian, Frederic)
- * Do not call gemm with strides 0, some blas refuse it. (Pascal Lamblin)
- * Optimization crash with gemm and complex. (Frederic)
- * GPU crash with elemwise. (Frederic, some reported by Chris Currivan)
- * Compilation crash with amdlibm and the GPU. (Frederic)
- * IfElse crash. (Frederic)
- * Execution crash fix in AdvancedSubtensor1 on 32 bit computers. (Pascal)
- * GPU compilation crash on MacOS X. (Olivier)
- * Support for OSX Enthought Python Distribution 7.x. (Graham Taylor, Olivier)
- * When the subtensor inputs had 0 dimensions and the outputs 0 dimensions. (Frederic)
- * Crash when the step to subtensor was not 1 in conjunction with some optimization. (Frederic, reported by Olivier Chapelle)
- * Runtime crash related to an optimization with subtensor of alloc (reported by Razvan, fixed by Frederic)
- * Fix dot22scalar cast of integer scalars (Justin Bayer, Frédéric, Olivier)
- * Fix runtime crash in gemm, dot22. FB
- * Fix on 32 bit computer: make sure all shapes are int64. (Olivier)
- * Fix to deque on python 2.4 (Olivier)
- * Fix crash when not using C code (or using DebugMode) (not used by
-   default) with numpy 1.6*. Numpy has a bug in the reduction code that
-   made it crash. (Pascal)
- * Crashes of blas functions (Gemv on CPU; Ger, Gemv and Gemm on GPU)
-   when matrices had non-unit stride in both dimensions (CPU and GPU),
-   or when matrices had negative strides (GPU only). In those cases,
-   we are now making copies. (Pascal)
- * More cases supported in AdvancedIncSubtensor1. (Olivier D.)
- * Fix crash when a broadcasted constant was used as input of an
-   elemwise Op and needed to be upcasted to match the op's output.
-   (Reported by John Salvatier, fixed by Pascal L.)
- * Fixed a memory leak with shared variable (we kept a pointer to the original value) (Ian G.)
-
-
-Known bugs:
- * CAReduce with nan in inputs don't return the good output (`Ticket <https://www.assembla.com/spaces/theano/tickets/763>`_).
-     * This is used in tensor.{max,mean,prod,sum} and in the grad of PermuteRowElements.
-
-
-Sandbox:
- * cvm interface more consistent with current linker. (James)
-   * Now all tests pass with the linker=cvm flags.
- * vm linker has a callback parameter. (James)
- * review/finish/doc: diag/extract_diag. (Arnaud Bergeron, Frederic, Olivier)
- * review/finish/doc: AllocDiag/diag. (Arnaud, Frederic, Guillaume)
- * review/finish/doc: MatrixInverse, matrix_inverse. (Razvan)
- * review/finish/doc: matrix_dot. (Razvan)
- * review/finish/doc: det (determinent) op. (Philippe Hamel)
- * review/finish/doc: Cholesky determinent op. (David)
- * review/finish/doc: ensure_sorted_indices. (Li Yao)
- * review/finish/doc: spectral_radius_boud. (Xavier Glorot)
- * review/finish/doc: sparse sum. (Valentin Bisson)
- * review/finish/doc: Remove0 (Valentin)
- * review/finish/doc: SquareDiagonal (Eric)
-
-
-Sandbox New features (not enabled by default):
- * CURAND_RandomStreams for uniform and normal (not picklable, GPU only) (James)
- * New sandbox.linalg.ops.pinv(pseudo-inverse) op (Razvan)
-
-
-Documentation:
- * Many updates. (Many people)
- * Updates to install doc on MacOS. (Olivier)
- * Updates to install doc on Windows. (David, Olivier)
- * Doc on the Rop function (Ian)
- * Added how to use scan to loop with a condition as the number of iteration. (Razvan)
- * Added how to wrap in Theano an existing python function (in numpy, scipy, ...). (Frederic)
- * Refactored GPU installation of Theano. (Olivier)
-
-
-Others:
- * Better error messages in many places. (Many people)
- * PEP8 fixes. (Many people)
- * Add a warning about numpy bug when using advanced indexing on a
-   tensor with more than 2**32 elements (the resulting array is not
-   correctly filled and ends with zeros). (Pascal, reported by David WF)
- * Added Scalar.ndim=0 and ScalarSharedVariable.ndim=0 (simplify code) (Razvan)
- * New min_informative_str() function to print graph. (Ian)
- * Fix catching of exception. (Sometimes we used to catch interrupts) (Frederic, David, Ian, Olivier)
- * Better support for utf string. (David)
- * Fix pydotprint with a function compiled with a ProfileMode (Frederic)
-     * Was broken with change to the profiler.
- * Warning when people have old cache entries. (Olivier)
- * More tests for join on the GPU and CPU. (Frederic)
- * Do not request to load the GPU module by default in scan module. (Razvan)
- * Fixed some import problems. (Frederic and others)
- * Filtering update. (James)
- * On Windows, the default compiledir changed to be local to the
-   computer/user and not transferred with roaming profile. (Sebastian
-   Urban)
- * New theano flag "on_shape_error". Defaults to "warn" (same as previous behavior):
-   it prints a warning when an error occurs when inferring the shape of some apply node.
-   The other accepted value is "raise" to raise an error when this happens. (Frederic)
- * The buidbot now raises optimization/shape errors instead of just printing a warning. (Frederic)
- * better pycuda tests (Frederic)
- * check_blas.py now accepts the shape and the number of iterations as parameter (Frederic)
- * Fix opt warning when the opt ShapeOpt is disabled (enabled by default) (Frederic)
- * More internal verification on what each op.infer_shape return. (Frederic, James)
- * Improved docstring and basic tests for the Tile Op (David).
-
-Reviewers (alphabetical order):
- * David, Frederic, Ian, James, Olivier, Razvan
-
-
-Theano 0.4.1 (12 August 2011)
-=============================
-
-New features:
-
- * `R_op <http://deeplearning.net/software/theano/tutorial/gradients.html>`_ macro like theano.tensor.grad
-
-   * Not all tests are done yet (TODO)
- * Added alias theano.tensor.bitwise_{and,or,xor,not}. They are the numpy names.
- * Updates returned by Scan (you need to pass them to the theano.function) are now a new Updates class.
-   That allow more check and easier work with them. The Updates class is a subclass of dict
- * Scan can now work in a "do while" loop style.
-
-   * We scan until a condition is met.
-   * There is a minimum of 1 iteration(can't do "while do" style loop)
- * The "Interactive Debugger" (compute_test_value theano flags)
-
-   * Now should work with all ops (even the one with only C code)
-   * In the past some errors were caught and re-raised as unrelated errors (ShapeMismatch replaced with NotImplemented). We don't do that anymore.
- * The new Op.make_thunk function(introduced in 0.4.0) is now used by constant_folding and DebugMode
- * Added A_TENSOR_VARIABLE.astype() as a way to cast. NumPy allows this syntax.
- * New BLAS GER implementation.
- * Insert GEMV more frequently.
- * Added new ifelse(scalar condition, rval_if_true, rval_if_false) Op.
-
-   * This is a subset of the elemwise switch (tensor condition, rval_if_true, rval_if_false).
-   * With the new feature in the sandbox, only one of rval_if_true or rval_if_false will be evaluated.
-
-Optimizations:
-
- * Subtensor has C code
- * {Inc,Set}Subtensor has C code
- * ScalarFromTensor has C code
- * dot(zeros,x) and dot(x,zeros)
- * IncSubtensor(x, zeros, idx) -> x
- * SetSubtensor(x, x[idx], idx) -> x (when x is a constant)
- * subtensor(alloc,...) -> alloc
- * Many new scan optimization
-
-   * Lower scan execution overhead with a Cython implementation
-   * Removed scan double compilation (by using the new Op.make_thunk mechanism)
-   * Certain computations from the inner graph are now Pushed out into the outer
-     graph. This means they are not re-comptued at every step of scan.
-   * Different scan ops get merged now into a single op (if possible), reducing
-     the overhead and sharing computations between the two instances
-
-GPU:
-
- * PyCUDA/CUDAMat/Gnumpy/Theano bridge and `documentation <http://deeplearning.net/software/theano/tutorial/gpu_data_convert.html>`_.
-
-   * New function to easily convert pycuda GPUArray object to and from CudaNdarray object
-   * Fixed a bug if you crated a view of a manually created CudaNdarray that are view of GPUArray.
- * Removed a warning when nvcc is not available and the user did not requested it.
- * renamed config option cuda.nvccflags -> nvcc.flags
- * Allow GpuSoftmax and GpuSoftmaxWithBias to work with bigger input.
-
-Bugs fixed:
-
- * In one case an AdvancedSubtensor1 could be converted to a GpuAdvancedIncSubtensor1 insted of GpuAdvancedSubtensor1.
-   It probably didn't happen due to the order of optimizations, but that order is not guaranteed to be the same on all computers.
- * Derivative of set_subtensor was wrong.
- * Derivative of Alloc was wrong.
-
-Crash fixed:
-
- * On an unusual Python 2.4.4 on Windows
- * When using a C cache copied from another location
- * On Windows 32 bits when setting a complex64 to 0.
- * Compilation crash with CUDA 4
- * When wanting to copy the compilation cache from a computer to another
-
-   * This can be useful for using Theano on a computer without a compiler.
- * GPU:
-
-   * Compilation crash fixed under Ubuntu 11.04
-   * Compilation crash fixed with CUDA 4.0
-
-Know bug:
-
- * CAReduce with nan in inputs don't return the good output (`Ticket <http://trac-hg.assembla.com/theano/ticket/763>`_).
-
-   * This is used in tensor.{max,mean,prod,sum} and in the grad of PermuteRowElements.
-   * This is not a new bug, just a bug discovered since the last release that we didn't had time to fix.
-
-Deprecation (will be removed in Theano 0.5, warning generated if you use them):
-
- * The string mode (accepted only by theano.function()) FAST_RUN_NOGC. Use Mode(linker='c|py_nogc') instead.
- * The string mode (accepted only by theano.function()) STABILIZE. Use Mode(optimizer='stabilize') instead.
- * scan interface change:
-
-   * The use of `return_steps` for specifying how many entries of the output
-     scan has been deprecated
-
-     * The same thing can be done by applying a subtensor on the output
-       return by scan to select a certain slice
-   * The inner function (that scan receives) should return its outputs and
-     updates following this order:
-
-        [outputs], [updates], [condition]. One can skip any of the three if not
-        used, but the order has to stay unchanged.
- * tensor.grad(cost, wrt) will return an object of the "same type" as wrt
-   (list/tuple/TensorVariable).
-
-   * Currently tensor.grad return a type list when the wrt is a list/tuple of
-     more than 1 element.
-
-Decrecated in 0.4.0(Reminder, warning generated if you use them):
-
- * Dividing integers with / is deprecated: use // for integer division, or
-   cast one of the integers to a float type if you want a float result (you may
-   also change this behavior with config.int_division).
- * tag.shape attribute deprecated (#633)
- * CudaNdarray_new_null is deprecated in favour of CudaNdarray_New
-
-Sandbox:
-
- * MRG random generator now implements the same casting behavior as the regular random generator.
-
-Sandbox New features(not enabled by default):
-
- * New Linkers (theano flags linker={vm,cvm})
-
-   * The new linker allows lazy evaluation of the new ifelse op, meaning we compute only the true or false branch depending of the condition. This can speed up some types of computation.
-   * Uses a new profiling system (that currently tracks less stuff)
-   * The cvm is implemented in C, so it lowers Theano's overhead.
-   * The vm is implemented in python. So it can help debugging in some cases.
-   * In the future, the default will be the cvm.
- * Some new not yet well tested sparse ops: theano.sparse.sandbox.{SpSum, Diag, SquareDiagonal, ColScaleCSC, RowScaleCSC, Remove0, EnsureSortedIndices, ConvolutionIndices}
-
-Documentation:
-
- * How to compute the `Jacobian, Hessian, Jacobian times a vector, Hessian times a vector <http://deeplearning.net/software/theano/tutorial/gradients.html>`_.
- * Slide for a 3 hours class with exercises that was done at the HPCS2011 Conference in Montreal.
-
-Others:
-
- * Logger name renamed to be consistent.
- * Logger function simplified and made more consistent.
- * Fixed transformation of error by other not related error with the compute_test_value Theano flag.
- * Compilation cache enhancements.
- * Made compatible with NumPy 1.6 and SciPy 0.9
- * Fix tests when there was new dtype in NumPy that is not supported by Theano.
- * Fixed some tests when SciPy is not available.
- * Don't compile anything when Theano is imported. Compile support code when we compile the first C code.
- * Python 2.4 fix:
-
-   * Fix the file theano/misc/check_blas.py
-   * For python 2.4.4 on Windows, replaced float("inf") with numpy.inf.
- * Removes useless inputs to a scan node
-
-   * Beautification mostly, making the graph more visible. Such inputs would appear as a consequence of other optimizations
-
-Core:
-
- * there is a new mechanism that lets an Op permit that one of its
-   inputs to be aliased to another destroyed input.  This will generally
-   result in incorrect calculation, so it should be used with care!  The
-   right way to use it is when the caller can guarantee that even if
-   these two inputs look aliased, they actually will never overlap. This
-   mechanism can be used, for example, by a new alternative approach to
-   implementing Scan.  If an op has an attribute called
-   "destroyhandler_tolerate_aliased" then this is what's going on.
-   IncSubtensor is thus far the only Op to use this mechanism.Mechanism
-
-Theano 0.4.0 (2011-06-13)
-=========================
-
-Change in output memory storage for Ops:
- If you implemented custom Ops, with either C or Python implementation,
- this will concern you.
-
- The contract for memory storage of Ops has been changed. In particular,
- it is no longer guaranteed that output memory buffers are either empty,
- or allocated by a previous execution of the same Op.
-
- Right now, here is the situation:
-  * For Python implementation (perform), what is inside output_storage
-    may have been allocated from outside the perform() function, for
-    instance by another node (e.g., Scan) or the Mode. If that was the
-    case, the memory can be assumed to be C-contiguous (for the moment).
-  * For C implementations (c_code), nothing has changed yet.
-
- In a future version, the content of the output storage, both for Python and C
- versions, will either be NULL, or have the following guarantees:
-  * It will be a Python object of the appropriate Type (for a Tensor variable,
-    a numpy.ndarray, for a GPU variable, a CudaNdarray, for instance)
-  * It will have the correct number of dimensions, and correct dtype
- However, its shape and memory layout (strides) will not be guaranteed.
-
- When that change is made, the config flag DebugMode.check_preallocated_output
- will help you find implementations that are not up-to-date.
-
-Deprecation:
- * tag.shape attribute deprecated (#633)
- * CudaNdarray_new_null is deprecated in favour of CudaNdarray_New
- * Dividing integers with / is deprecated: use // for integer division, or
-   cast one of the integers to a float type if you want a float result (you may
-   also change this behavior with config.int_division).
- * Removed (already deprecated) sandbox/compile module
- * Removed (already deprecated) incsubtensor and setsubtensor functions,
-   inc_subtensor and set_subtensor are to be used instead.
-
-Bugs fixed:
- * In CudaNdarray.__{iadd,idiv}__, when it is not implemented, return the error.
- * THEANO_FLAGS='optimizer=None' now works as expected
- * Fixed memory leak in error handling on GPU-to-host copy
- * Fix relating specifically to Python 2.7 on Mac OS X
- * infer_shape can now handle Python longs
- * Trying to compute x % y with one or more arguments being complex now
-   raises an error.
- * The output of random samples computed with uniform(..., dtype=...) is
-   guaranteed to be of the specified dtype instead of potentially being of a
-   higher-precision dtype.
- * The perform() method of DownsampleFactorMax did not give the right result
-   when reusing output storage. This happen only if you use the Theano flags
-   'linker=c|py_nogc' or manually specify the mode to be 'c|py_nogc'.
-
-Crash fixed:
- * Work around a bug in gcc 4.3.0 that make the compilation of 2d convolution
-   crash.
- * Some optimizations crashed when the "ShapeOpt" optimization was disabled.
-
-Optimization:
- * Optimize all subtensor followed by subtensor.
-
-GPU:
- * Move to the gpu fused elemwise that have other dtype then float32 in them
-   (except float64) if the input and output are float32.
-   * This allow to move elemwise comparisons to the GPU if we cast it to
-     float32 after that.
- * Implemented CudaNdarray.ndim to have the same interface in ndarray.
- * Fixed slowdown caused by multiple chained views on CudaNdarray objects
- * CudaNdarray_alloc_contiguous changed so as to never try to free
-   memory on a view: new "base" property
- * Safer decref behaviour in CudaNdarray in case of failed allocations
- * New GPU implementation of tensor.basic.outer
- * Multinomial random variates now available on GPU
-
-New features:
- * ProfileMode
-    * profile the scan overhead
-    * simple hook system to add profiler
-    * reordered the output to be in the order of more general to more specific
- * DebugMode now checks Ops with different patterns of preallocated memory,
-   configured by config.DebugMode.check_preallocated_output.
- * var[vector of index] now work, (grad work recursively, the direct grad
-   work inplace, gpu work)
-    * limitation: work only of the outer most dimensions.
- * New way to test the graph as we build it. Allow to easily find the source
-   of shape mismatch error:
-   `http://deeplearning.net/software/theano/tutorial/debug_faq.html#interactive-debugger`__
- * cuda.root inferred if nvcc is on the path, otherwise defaults to
-   /usr/local/cuda
- * Better graph printing for graphs involving a scan subgraph
- * Casting behavior can be controlled through config.cast_policy,
-   new (experimental) mode.
- * Smarter C module cache, avoiding erroneous usage of the wrong C
-   implementation when some options change, and avoiding recompiling the
-   same module multiple times in some situations.
- * The "theano-cache clear" command now clears the cache more thoroughly.
- * More extensive linear algebra ops (CPU only) that wrap scipy.linalg
-   now available in the sandbox.
- * CUDA devices 4 - 16 should now be available if present.
- * infer_shape support for the View op, better infer_shape support in Scan
- * infer_shape supported in all case of subtensor
- * tensor.grad now gives an error by default when computing the gradient
-   wrt a node that is disconnected from the cost (not in the graph, or
-   no continuous path from that op to the cost).
- * New tensor.isnan and isinf functions.
-
-Documentation:
- * Better commenting of cuda_ndarray.cu
- * Fixes in the scan documentation: add missing declarations/print statements
- * Better error message on failed __getitem__
- * Updated documentation on profile mode
- * Better documentation of testing on Windows
- * Better documentation of the 'run_individual_tests' script
-
-Unit tests:
- * More strict float comparaison by default
- * Reuse test for subtensor of tensor for gpu tensor(more gpu test)
- * Tests that check for aliased function inputs and assure appropriate copying
-   (#374)
- * Better test of copies in CudaNdarray
- * New tests relating to the new base pointer requirements
- * Better scripts to run tests individually or in batches
- * Some tests are now run whenever cuda is available and not just when it has
-   been enabled before
- * Tests display less pointless warnings.
-
-Other:
- * Correctly put the broadcast flag to True in the output var of
-   a Reshape op when we receive an int 1 in the new shape.
- * pydotprint: high contrast mode is now the default, option to print
-   more compact node names.
- * pydotprint: How trunk label that are too long.
- * More compact printing (ignore leading "Composite" in op names)
-
-
-Theano 0.3.1 (2011-02-21)
-=========================
-
-Deprecation:
- * The theano shared variable attribute `value` is deprecated, use `get_value()` or `set_value()`!
-    See http://deeplearning.net/software/theano/tutorial/aliasing.html
-
-Bugs fixed:
- * The random number generator in theano/sandbox/rng_mrg.py did not always return the same sequence of number on the CPU and GPU.
-    * In some cases, there was a (possibly large) fraction of non-random garbage in the returned sequence.
-
- * In python mode (not the default mode) when input of elemwise operation was an empty ndarray, we were not returning an empty ndarray.
- * Scan cached the number of steps. This caused no problem because each time you called scan the number of steps would got refreshed.
-   The problem was when you called ScanGrad which would use the cached number of steps without refreshing it.
-   To be affected by this bug, one would have to compile two graph, one that would contain a Scan and the other the corresponding GradScan, and
-   call the first function to cache the number of steps, and then call the second function with a different number of steps.
- * In GpuConv, errors in conv_patch_stack_reduce when the entire kernel doesn't fit into shared memory.
-   The error was not found before as the impact was less then the relative tolerance of 1e-3. Now the relative tolerance is 1e-5.
-
-Crash fixed:
- * Add a feature to not have an exception that makes Theano crash when taking the gradient on DimShuffle in some particular case.
- * Compilation crash for GpuElemwise with tensor with high number of dimensions (~6 or more).
- * Disabled C code generator that make gcc crash on complex type.
- * Crash in optimization when an Op has no input.
- * Output shape is now computed correctly for matrix-vector multiplication on GPU.
- * In Scan, when using numbers as inputs, not symbolic variables.
- * In GradScan, when there is only 1 inputs in the Scan.
- * In GpuSum, bug in calculation of n_blocks for the 10 pattern. (Sum on the row of a matrix)
- * Some segfault at exit with GPU code.
-
-Optimization:
- * New SpecifyShape op that allow to pass more shape info in the graph.
- * Speed up gemv by a work around scipy gemv slowness when the matrix is in C order (the default).
- * Remove join of only 1 element.
- * During optimization, consider one more case in get_constant_value.
-
-GPU:
- * cuda_shared.value = X now works inplace!
-     * cuda_shared_var.set_value(new_ndarray) will overwrite the old value inplace in the most common case.
- * Allow to create a CudaNdarraySharedVariable from a CudaNdarray.
- * New init_gpu_device theano flags.
- * Fuse GpuElemwise more often (in the case where there are so many inputs that fusing them all would bust the 256 bytes limit of parameter to gpu function).
- * CPU join of only 1 element that was not moved to the GPU.
-
-New features:
- * tensor.reshape now makes dimensions of length 1 broadcastable.
- * tensor.prod now implements the gradient.
- * DebugMode now warns if an Op declared itself as returning a view of the input but did not do so.
-    * This behaviour is a problem, because it can block other Ops from being inplace on the same inputs. This could lower the reuse of memory.
- * Sparse.structured_dot now works when both matrices are sparse
- * Sparse type is now supported by the shape op, and the ShapeFeature optimizer works correctly with them.
- * New 3D convolution ops, with CPU and GPU implementations.
- * New colors in pydotprint.
-
-Documentation:
- * Documented lib.amdlibm and (new) init_gpu_device config variables.
- * A new page (was done for 0.3 but an error was hiding it on the web page) on the memory aliasing contract of Theano.
- * Revision to the Windows installation instructions.
- * The cuda documentation is now generated on the web server.
- * Better documentation of .theanorc and its sections.
-
-Unit tests:
- * Stop usage of deprecated functions or syntax in the unit tests.
- * Better testing of GPU convolution nets.
- * Make more tests able to use different random seeds.
- * Tests of sparse now use default mode, not a hard-coded one.
- * Remove some tests of unimplemented features.
-
-Other:
- * The name of compiledir now includes the Python version to make it easier for people with many Python versions
- * Added theano.tensor.std as a shortcut to sqrt(var(input=input, axis=axis)).
- * Whitespace, tabulation and indentation clean-up in the code.
- * Better detection of memory sharing between variables.
-
-
-Theano 0.3 (2010-11-23)
-=======================
-
-This is the first major release of Theano since 0.1. Version 0.2 development started internally but it was never advertised as a release.
-
-There have been so many changes since 0.1 that we have lost track of many of them. Below is a *partial* list of changes since 0.1.
-
- * GPU code using NVIDIA's CUDA framework is now generated for many Ops.
- * Some interface changes since 0.1:
-     * A new "shared variable" system to allow reusing memory space between Theano functions.
-         * A new memory contract has been formally written for Theano, for people who want to minimize memory copies.
-     * The old module system has been deprecated.
-     * By default, inputs to a Theano function will not be silently downcasted (e.g. from float64 to float32).
-     * An error is now raised when using the result of logical operation on Theano variable in an 'if' (i.e. an implicit call to __nonzeros__).
-     * An error is now raised when we receive a non-aligned ndarray as input to a function (this is not supported).
-     * An error is raised when the list of dimensions passed to dimshuffle() contains duplicates or is otherwise not sensible.
-     * Call NumPy BLAS bindings for gemv operations in addition to the already supported gemm.
-     * If gcc is unavailable at import time, Theano now falls back to a Python-based emulation mode after raising a warning.
-     * An error is now raised when tensor.grad is called on a non-scalar Theano variable (in the past we would implicitly do a sum on the tensor to make it a scalar).
-     * Added support for "erf" and "erfc" functions.
- * The current default value of the parameter axis of theano.{max,min,argmax,argmin,max_and_argmax} is deprecated. We now use the default NumPy behavior of operating on the entire tensor.
- * Theano is now available from PyPI and installable through "easy_install" or "pip".
-
-
-Theano 0.1
-==========
-
-*Release date: 2009-04-02*
-
-What works
-----------
-
-- building symbolic expression.
-- arranging symbolic expressions into Modules so that multiple functions
-  can work on the same data.
-- symbolic gradient descent.
-- graph optimization.
-- compilation to C for many kinds of expression.
-- a debugging mode that checks that your expression results are correct,
-  using a variety of sanity checks.
-
-What's missing?
----------------
-
-- An algorithm library. We're missing a library of examples and standard
-  component implementations.  Some examples will find their way into
-  the Theano repo, but standard algorithms will go into the 'pylearn'
-  project (toolbox style). Now that we have a stable foundation, we
-  can reach a consensus on style for algorithms.
diff --git a/MANIFEST.in b/MANIFEST.in
index f07f4ca86d..67628ca9bc 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -7,8 +7,8 @@ global-include *.h
 global-include *.sh
 global-include *.pkl
 recursive-include doc
-include bin/theano-cache
+include bin/aesara-cache
 prune .jenkins
 prune .travis
 include versioneer.py
-include theano/_version.py
+include aesara/_version.py
diff --git a/Makefile b/Makefile
index cda58775e4..e7234778d9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 .PHONY: help venv conda docker check-docstyle check-format check-style format style test lint check coverage pypi
 .DEFAULT_GOAL = help
 
-PROJECT_NAME = theano
-PROJECT_DIR = theano/
+PROJECT_NAME = aesara
+PROJECT_DIR = aesara/
 PYTHON = python
 PIP = pip
 CONDA = conda
diff --git a/NEWS.txt b/NEWS.txt
deleted file mode 100644
index 1c6a9b7a2b..0000000000
--- a/NEWS.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-=============
-Release Notes
-=============
-
-Theano-PyMC 1.0.5 (on deck)
-=========================================
-
-Highlights (since 1.0.4):
-
- - First release under new name Theano-PyMC
-
-A total of x people contributed to this release since ``1.0.4``:
diff --git a/NEWS_DEV.txt b/NEWS_DEV.txt
deleted file mode 100644
index db0d344632..0000000000
--- a/NEWS_DEV.txt
+++ /dev/null
@@ -1,414 +0,0 @@
-.. _NEWS_DEV:
-
-===================
-DRAFT Release Notes
-===================
-
-git log -p rel-1.0.1... |grep Merge|grep '#[0123456789]' |cut -f 8 -d ' ' | sed 's\#\* https://github.com/Theano/Theano/pull/\'
-
-# Commit count per user
-git shortlog -sn rel-1.0.1..
-
-
-Highlights:
- - ...
-
-Interface changes:
- - ...
-
-Convolution updates:
- - ...
-
-GPU:
- - ...
-
- - cuDNN support
-   - ...
-
-New features:
- - ...
-
-Others:
- - ...
-
-Other more detailed changes:
- - ...
-
-
-PULL REQUESTS CHECKED FOR 1.0.1 SINCE 1.0.0
-* https://github.com/Theano/Theano/pull/6530
-* https://github.com/Theano/Theano/pull/6513
-* https://github.com/Theano/Theano/pull/6520
-* https://github.com/Theano/Theano/pull/6525
-* https://github.com/Theano/Theano/pull/6517
-* https://github.com/Theano/Theano/pull/6506
-* https://github.com/Theano/Theano/pull/6512
-
-ALL THE PR BELLOW HAVE BEEN CHECKED FOR FINAL RELEASE 1.0.0 SINCE 0.9.0
-* https://github.com/Theano/Theano/pull/6509
-* https://github.com/Theano/Theano/pull/6508
-* https://github.com/Theano/Theano/pull/6505
-* https://github.com/Theano/Theano/pull/6496
-* https://github.com/Theano/Theano/pull/6495
-* https://github.com/Theano/Theano/pull/6492
-* https://github.com/Theano/Theano/pull/6489
-* https://github.com/Theano/Theano/pull/6488
-* https://github.com/Theano/Theano/pull/6490
-* https://github.com/Theano/Theano/pull/5932
-* https://github.com/Theano/Theano/pull/6479
-* https://github.com/Theano/Theano/pull/6401
-* https://github.com/Theano/Theano/pull/6472
-* https://github.com/Theano/Theano/pull/6477
-* https://github.com/Theano/Theano/pull/6475
-* https://github.com/Theano/Theano/pull/6468
-* https://github.com/Theano/Theano/pull/6467
-* https://github.com/Theano/Theano/pull/6469
-* https://github.com/Theano/Theano/pull/6466
-* https://github.com/Theano/Theano/pull/6460
-* https://github.com/Theano/Theano/pull/6459
-* https://github.com/Theano/Theano/pull/6457
-* https://github.com/Theano/Theano/pull/6456
-* https://github.com/Theano/Theano/pull/6453
-* https://github.com/Theano/Theano/pull/6452
-* https://github.com/Theano/Theano/pull/6430
-* https://github.com/Theano/Theano/pull/6447
-* https://github.com/Theano/Theano/pull/6446
-* https://github.com/Theano/Theano/pull/6431
-* https://github.com/Theano/Theano/pull/6445
-* https://github.com/Theano/Theano/pull/6348
-* https://github.com/Theano/Theano/pull/6416
-* https://github.com/Theano/Theano/pull/6443
-* https://github.com/Theano/Theano/pull/6440
-* https://github.com/Theano/Theano/pull/6388
-* https://github.com/Theano/Theano/pull/5641
-* https://github.com/Theano/Theano/pull/6367
-* https://github.com/Theano/Theano/pull/6437
-* https://github.com/Theano/Theano/pull/6439
-* https://github.com/Theano/Theano/pull/6425
-* https://github.com/Theano/Theano/pull/6434
-* https://github.com/Theano/Theano/pull/5959
-* https://github.com/Theano/Theano/pull/6005
-* https://github.com/Theano/Theano/pull/6427
-* https://github.com/Theano/Theano/pull/6424
-* https://github.com/Theano/Theano/pull/6419
-* https://github.com/Theano/Theano/pull/6415
-* https://github.com/Theano/Theano/pull/6418
-* https://github.com/Theano/Theano/pull/5891
-* https://github.com/Theano/Theano/pull/6316
-* https://github.com/Theano/Theano/pull/6331
-* https://github.com/Theano/Theano/pull/6100
-* https://github.com/Theano/Theano/pull/6412
-* https://github.com/Theano/Theano/pull/6221
-* https://github.com/Theano/Theano/pull/6386
-* https://github.com/Theano/Theano/pull/6411
-* https://github.com/Theano/Theano/pull/6405
-* https://github.com/Theano/Theano/pull/6410
-* https://github.com/Theano/Theano/pull/6413
-* https://github.com/Theano/Theano/pull/6389
-* https://github.com/Theano/Theano/pull/6409
-* https://github.com/Theano/Theano/pull/6406
-* https://github.com/Theano/Theano/pull/6396
-* https://github.com/Theano/Theano/pull/6392
-* https://github.com/Theano/Theano/pull/6393
-* https://github.com/Theano/Theano/pull/6384
-* https://github.com/Theano/Theano/pull/6326
-* https://github.com/Theano/Theano/pull/6317
-* https://github.com/Theano/Theano/pull/6269
-* https://github.com/Theano/Theano/pull/5688
-* https://github.com/Theano/Theano/pull/6376
-* https://github.com/Theano/Theano/pull/6377
-* https://github.com/Theano/Theano/pull/6355
-* https://github.com/Theano/Theano/pull/6373
-* https://github.com/Theano/Theano/pull/6374
-* https://github.com/Theano/Theano/pull/6371
-* https://github.com/Theano/Theano/pull/6362
-* https://github.com/Theano/Theano/pull/6368
-* https://github.com/Theano/Theano/pull/6339
-* https://github.com/Theano/Theano/pull/6366
-* https://github.com/Theano/Theano/pull/6364
-* https://github.com/Theano/Theano/pull/6349
-* https://github.com/Theano/Theano/pull/6361
-* https://github.com/Theano/Theano/pull/6356
-* https://github.com/Theano/Theano/pull/6359
-* https://github.com/Theano/Theano/pull/6286
-* https://github.com/Theano/Theano/pull/6357
-* https://github.com/Theano/Theano/pull/6354
-* https://github.com/Theano/Theano/pull/6336
-* https://github.com/Theano/Theano/pull/6351
-* https://github.com/Theano/Theano/pull/6301
-* https://github.com/Theano/Theano/pull/6333
-* https://github.com/Theano/Theano/pull/6341
-* https://github.com/Theano/Theano/pull/6332
-* https://github.com/Theano/Theano/pull/6319
-* https://github.com/Theano/Theano/pull/6302
-* https://github.com/Theano/Theano/pull/6300
-* https://github.com/Theano/Theano/pull/6323
-* https://github.com/Theano/Theano/pull/6324
-* https://github.com/Theano/Theano/pull/5817
-* https://github.com/Theano/Theano/pull/6312
-* https://github.com/Theano/Theano/pull/6061
-* https://github.com/Theano/Theano/pull/6305
-* https://github.com/Theano/Theano/pull/6059
-* https://github.com/Theano/Theano/pull/6315
-* https://github.com/Theano/Theano/pull/6295
-* https://github.com/Theano/Theano/pull/6252
-* https://github.com/Theano/Theano/pull/6267
-* https://github.com/Theano/Theano/pull/6207
-* https://github.com/Theano/Theano/pull/6309
-* https://github.com/Theano/Theano/pull/6307
-* https://github.com/Theano/Theano/pull/6000
-* https://github.com/Theano/Theano/pull/6293
-* https://github.com/Theano/Theano/pull/6292
-* https://github.com/Theano/Theano/pull/6299
-* https://github.com/Theano/Theano/pull/6143
-* https://github.com/Theano/Theano/pull/6296
-* https://github.com/Theano/Theano/pull/6280
-* https://github.com/Theano/Theano/pull/6289
-* https://github.com/Theano/Theano/pull/6285
-* https://github.com/Theano/Theano/pull/6275
-* https://github.com/Theano/Theano/pull/6218
-* https://github.com/Theano/Theano/pull/6271
-* https://github.com/Theano/Theano/pull/6253
-* https://github.com/Theano/Theano/pull/6273
-* https://github.com/Theano/Theano/pull/6262
-* https://github.com/Theano/Theano/pull/6214
-* https://github.com/Theano/Theano/pull/6264
-* https://github.com/Theano/Theano/pull/6256
-* https://github.com/Theano/Theano/pull/6254
-* https://github.com/Theano/Theano/pull/6220
-* https://github.com/Theano/Theano/pull/5949
-* https://github.com/Theano/Theano/pull/6243
-* https://github.com/Theano/Theano/pull/6250
-* https://github.com/Theano/Theano/pull/6225
-* https://github.com/Theano/Theano/pull/6242
-* https://github.com/Theano/Theano/pull/6213
-* https://github.com/Theano/Theano/pull/6199
-* https://github.com/Theano/Theano/pull/6209
-* https://github.com/Theano/Theano/pull/6216
-* https://github.com/Theano/Theano/pull/6215
-* https://github.com/Theano/Theano/pull/6182
-* https://github.com/Theano/Theano/pull/6194
-* https://github.com/Theano/Theano/pull/6190
-* https://github.com/Theano/Theano/pull/6146
-* https://github.com/Theano/Theano/pull/6201
-* https://github.com/Theano/Theano/pull/6150
-* https://github.com/Theano/Theano/pull/6204
-* https://github.com/Theano/Theano/pull/6166
-* https://github.com/Theano/Theano/pull/6174
-* https://github.com/Theano/Theano/pull/6205
-* https://github.com/Theano/Theano/pull/6183
-* https://github.com/Theano/Theano/pull/6186
-* https://github.com/Theano/Theano/pull/6203
-* https://github.com/Theano/Theano/pull/6161
-* https://github.com/Theano/Theano/pull/6164
-* https://github.com/Theano/Theano/pull/6050
-* https://github.com/Theano/Theano/pull/6178
-* https://github.com/Theano/Theano/pull/6180
-* https://github.com/Theano/Theano/pull/6173
-* https://github.com/Theano/Theano/pull/6170
-* https://github.com/Theano/Theano/pull/6092
-* https://github.com/Theano/Theano/pull/6163
-* https://github.com/Theano/Theano/pull/6171
-* https://github.com/Theano/Theano/pull/6169
-* https://github.com/Theano/Theano/pull/6165
-* https://github.com/Theano/Theano/pull/5914
-* https://github.com/Theano/Theano/pull/5775
-* https://github.com/Theano/Theano/pull/6147
-* https://github.com/Theano/Theano/pull/6159
-* https://github.com/Theano/Theano/pull/6156
-* https://github.com/Theano/Theano/pull/6154
-* https://github.com/Theano/Theano/pull/5991
-* https://github.com/Theano/Theano/pull/6149
-* https://github.com/Theano/Theano/pull/6151
-* https://github.com/Theano/Theano/pull/6116
-* https://github.com/Theano/Theano/pull/6111
-* https://github.com/Theano/Theano/pull/6139
-* https://github.com/Theano/Theano/pull/6097
-* https://github.com/Theano/Theano/pull/6070
-* https://github.com/Theano/Theano/pull/6148
-* https://github.com/Theano/Theano/pull/6140
-* https://github.com/Theano/Theano/pull/6138
-* https://github.com/Theano/Theano/pull/5881
-* https://github.com/Theano/Theano/pull/6130
-* https://github.com/Theano/Theano/pull/6044
-* https://github.com/Theano/Theano/pull/6060
-* https://github.com/Theano/Theano/pull/6109
-* https://github.com/Theano/Theano/pull/6119
-* https://github.com/Theano/Theano/pull/6123
-* https://github.com/Theano/Theano/pull/6117
-* https://github.com/Theano/Theano/pull/6120
-* https://github.com/Theano/Theano/pull/5747
-* https://github.com/Theano/Theano/pull/6087
-* https://github.com/Theano/Theano/pull/6108
-* https://github.com/Theano/Theano/pull/6112
-* https://github.com/Theano/Theano/pull/6106
-* https://github.com/Theano/Theano/pull/6107
-* https://github.com/Theano/Theano/pull/6105
-* https://github.com/Theano/Theano/pull/6102
-* https://github.com/Theano/Theano/pull/6101
-* https://github.com/Theano/Theano/pull/6077
-* https://github.com/Theano/Theano/pull/6085
-* https://github.com/Theano/Theano/pull/6091
-* https://github.com/Theano/Theano/pull/6013
-* https://github.com/Theano/Theano/pull/6088
-* https://github.com/Theano/Theano/pull/6069
-* https://github.com/Theano/Theano/pull/6084
-* https://github.com/Theano/Theano/pull/6083
-* https://github.com/Theano/Theano/pull/6081
-* https://github.com/Theano/Theano/pull/6072
-* https://github.com/Theano/Theano/pull/6045
-* https://github.com/Theano/Theano/pull/6082
-* https://github.com/Theano/Theano/pull/6049
-* https://github.com/Theano/Theano/pull/6076
-* https://github.com/Theano/Theano/pull/6062
-* https://github.com/Theano/Theano/pull/6041
-* https://github.com/Theano/Theano/pull/6057
-* https://github.com/Theano/Theano/pull/6055
-* https://github.com/Theano/Theano/pull/6056
-* https://github.com/Theano/Theano/pull/6043
-* https://github.com/Theano/Theano/pull/6032
-* https://github.com/Theano/Theano/pull/6030
-* https://github.com/Theano/Theano/pull/5942
-* https://github.com/Theano/Theano/pull/6025
-* https://github.com/Theano/Theano/pull/6038
-* https://github.com/Theano/Theano/pull/6034
-* https://github.com/Theano/Theano/pull/6012
-* https://github.com/Theano/Theano/pull/6029
-* https://github.com/Theano/Theano/pull/6015
-* https://github.com/Theano/Theano/pull/6027
-* https://github.com/Theano/Theano/pull/6026
-* https://github.com/Theano/Theano/pull/5980
-* https://github.com/Theano/Theano/pull/6021
-* https://github.com/Theano/Theano/pull/6022
-* https://github.com/Theano/Theano/pull/6011
-* https://github.com/Theano/Theano/pull/5935
-* https://github.com/Theano/Theano/pull/5955
-* https://github.com/Theano/Theano/pull/6009
-* https://github.com/Theano/Theano/pull/5016
-* https://github.com/Theano/Theano/pull/5794
-* https://github.com/Theano/Theano/pull/5996
-* https://github.com/Theano/Theano/pull/5923
-* https://github.com/Theano/Theano/pull/5993
-* https://github.com/Theano/Theano/pull/5983
-* https://github.com/Theano/Theano/pull/5964
-* https://github.com/Theano/Theano/pull/5940
-* https://github.com/Theano/Theano/pull/5915
-* https://github.com/Theano/Theano/pull/5989
-* https://github.com/Theano/Theano/pull/5988
-* https://github.com/Theano/Theano/pull/5987
-* https://github.com/Theano/Theano/pull/5908
-* https://github.com/Theano/Theano/pull/5974
-* https://github.com/Theano/Theano/pull/5965
-* https://github.com/Theano/Theano/pull/5960
-* https://github.com/Theano/Theano/pull/5957
-* https://github.com/Theano/Theano/pull/5936
-* https://github.com/Theano/Theano/pull/5950
-* https://github.com/Theano/Theano/pull/5948
-* https://github.com/Theano/Theano/pull/5946
-* https://github.com/Theano/Theano/pull/5947
-* https://github.com/Theano/Theano/pull/5927
-* https://github.com/Theano/Theano/pull/5944
-* https://github.com/Theano/Theano/pull/5918
-* https://github.com/Theano/Theano/pull/5941
-* https://github.com/Theano/Theano/pull/5931
-* https://github.com/Theano/Theano/pull/5937
-* https://github.com/Theano/Theano/pull/5852
-* https://github.com/Theano/Theano/pull/5922
-* https://github.com/Theano/Theano/pull/5921
-* https://github.com/Theano/Theano/pull/5902
-* https://github.com/Theano/Theano/pull/5903
-* https://github.com/Theano/Theano/pull/5909
-* https://github.com/Theano/Theano/pull/5758
-* https://github.com/Theano/Theano/pull/5778
-* https://github.com/Theano/Theano/pull/5900
-* https://github.com/Theano/Theano/pull/5895
-* https://github.com/Theano/Theano/pull/5883
-* https://github.com/Theano/Theano/pull/5896
-* https://github.com/Theano/Theano/pull/5888
-* https://github.com/Theano/Theano/pull/5886
-* https://github.com/Theano/Theano/pull/5885
-* https://github.com/Theano/Theano/pull/5873
-* https://github.com/Theano/Theano/pull/5877
-* https://github.com/Theano/Theano/pull/5878
-* https://github.com/Theano/Theano/pull/5872
-* https://github.com/Theano/Theano/pull/5870
-* https://github.com/Theano/Theano/pull/5854
-* https://github.com/Theano/Theano/pull/5865
-* https://github.com/Theano/Theano/pull/5853
-* https://github.com/Theano/Theano/pull/5850
-* https://github.com/Theano/Theano/pull/5538
-* https://github.com/Theano/Theano/pull/5863
-* https://github.com/Theano/Theano/pull/5799
-* https://github.com/Theano/Theano/pull/5859
-* https://github.com/Theano/Theano/pull/5755
-* https://github.com/Theano/Theano/pull/5860
-* https://github.com/Theano/Theano/pull/5716
-* https://github.com/Theano/Theano/pull/5842
-* https://github.com/Theano/Theano/pull/5821
-* https://github.com/Theano/Theano/pull/5789
-* https://github.com/Theano/Theano/pull/5847
-* https://github.com/Theano/Theano/pull/5735
-* https://github.com/Theano/Theano/pull/5710
-* https://github.com/Theano/Theano/pull/5843
-* https://github.com/Theano/Theano/pull/5832
-* https://github.com/Theano/Theano/pull/5814
-* https://github.com/Theano/Theano/pull/5835
-* https://github.com/Theano/Theano/pull/5834
-* https://github.com/Theano/Theano/pull/5829
-* https://github.com/Theano/Theano/pull/5785
-* https://github.com/Theano/Theano/pull/5824
-* https://github.com/Theano/Theano/pull/5820
-* https://github.com/Theano/Theano/pull/5808
-* https://github.com/Theano/Theano/pull/5815
-* https://github.com/Theano/Theano/pull/5819
-* https://github.com/Theano/Theano/pull/5612
-* https://github.com/Theano/Theano/pull/5802
-* https://github.com/Theano/Theano/pull/5796
-* https://github.com/Theano/Theano/pull/5806
-* https://github.com/Theano/Theano/pull/5782
-* https://github.com/Theano/Theano/pull/5787
-* https://github.com/Theano/Theano/pull/5774
-* https://github.com/Theano/Theano/pull/5751
-* https://github.com/Theano/Theano/pull/5779
-* https://github.com/Theano/Theano/pull/5763
-* https://github.com/Theano/Theano/pull/5746
-* https://github.com/Theano/Theano/pull/5579
-* https://github.com/Theano/Theano/pull/5772
-* https://github.com/Theano/Theano/pull/5756
-* https://github.com/Theano/Theano/pull/5769
-* https://github.com/Theano/Theano/pull/5433
-* https://github.com/Theano/Theano/pull/5760
-* https://github.com/Theano/Theano/pull/5470
-* https://github.com/Theano/Theano/pull/5759
-* https://github.com/Theano/Theano/pull/5739
-* https://github.com/Theano/Theano/pull/5752
-* https://github.com/Theano/Theano/pull/5548
-* https://github.com/Theano/Theano/pull/5749
-* https://github.com/Theano/Theano/pull/5665
-* https://github.com/Theano/Theano/pull/5562
-* https://github.com/Theano/Theano/pull/5686
-* https://github.com/Theano/Theano/pull/5718
-* https://github.com/Theano/Theano/pull/5698
-* https://github.com/Theano/Theano/pull/5720
-* https://github.com/Theano/Theano/pull/5717
-* https://github.com/Theano/Theano/pull/5715
-* https://github.com/Theano/Theano/pull/5502
-* https://github.com/Theano/Theano/pull/5533
-* https://github.com/Theano/Theano/pull/5660
-* https://github.com/Theano/Theano/pull/5682
-* https://github.com/Theano/Theano/pull/5704
-* https://github.com/Theano/Theano/pull/5687
-* https://github.com/Theano/Theano/pull/5455
-* https://github.com/Theano/Theano/pull/5667
-* https://github.com/Theano/Theano/pull/5554
-* https://github.com/Theano/Theano/pull/5486
-* https://github.com/Theano/Theano/pull/5567
-* https://github.com/Theano/Theano/pull/5615
-* https://github.com/Theano/Theano/pull/5672
-* https://github.com/Theano/Theano/pull/5524
-
-Theano Development version
-==========================
-
-NEWS.txt:
-
diff --git a/README.rst b/README.rst
index f7f368032d..c4d7f0acd2 100644
--- a/README.rst
+++ b/README.rst
@@ -7,10 +7,6 @@ arrays.  It can use GPUs and perform efficient symbolic differentiation.
 This is a fork of the `original Theano library <https://github.com/Theano/Theano>`__ that is being
 maintained by the `PyMC team <https://github.com/pymc-devs>`__.
 
-.. warning::
-   The name of this repository/project may change in the near future.
-
-
 Features
 ========
 
@@ -36,21 +32,21 @@ The latest release of |Project Name| can be installed from PyPI using ``pip``:
 
 ::
 
-    pip install Theano-PyMC
+    pip install aesara
 
 
 Or via conda-forge:
 
 ::
 
-    conda install -c conda-forge theano-pymc
+    conda install -c conda-forge aesara
 
 
 The current development branch of |Project Name| can be installed from GitHub, also using ``pip``:
 
 ::
 
-    pip install git+https://github.com/pymc-devs/Theano-PyMC
+    pip install git+https://github.com/pymc-devs/aesara
 
 
 For platform-specific installation information see the legacy documentation `here <http://deeplearning.net/software/theano/install.html>`__.
@@ -62,8 +58,8 @@ Support
 The PyMC group operates under the NumFOCUS umbrella. If you want to support us financially, you can donate `here <https://numfocus.salsalabs.org/donate-to-pymc3/index.html>`__.
 
 
-.. |Project Name| replace:: Theano-PyMC
-.. |Tests Status| image:: https://github.com/pymc-devs/Theano-PyMC/workflows/Tests/badge.svg
-  :target: https://github.com/pymc-devs/Theano/actions?query=workflow%3ATests
-.. |Coverage| image:: https://coveralls.io/repos/github/pymc-devs/Theano-PyMC/badge.svg?branch=master
-  :target: https://coveralls.io/github/pymc-devs/Theano-PyMC?branch=master
+.. |Project Name| replace:: Aesara
+.. |Tests Status| image:: https://github.com/pymc-devs/aesara/workflows/Tests/badge.svg
+  :target: https://github.com/pymc-devs/aesara/actions?query=workflow%3ATests
+.. |Coverage| image:: https://coveralls.io/repos/github/pymc-devs/aesara/badge.svg?branch=master
+  :target: https://coveralls.io/github/pymc-devs/aesara?branch=master
diff --git a/theano/__init__.py b/aesara/__init__.py
similarity index 60%
rename from theano/__init__.py
rename to aesara/__init__.py
index e8270f9c82..0e3ee5d48c 100644
--- a/theano/__init__.py
+++ b/aesara/__init__.py
@@ -1,13 +1,13 @@
 """
-Theano is an optimizing compiler in Python, built to evaluate
+Aesara is an optimizing compiler in Python, built to evaluate
 complicated expressions (especially matrix-valued ones) as quickly as
-possible.  Theano compiles expression graphs (see :doc:`graph` ) that
+possible.  Aesara compiles expression graphs (see :doc:`graph` ) that
 are built by Python code. The expressions in these graphs are called
 `Apply` nodes and the variables in these graphs are called `Variable`
 nodes.
 
 You compile a graph by calling `function`, which takes a graph, and
-returns a callable object.  One of theano's most important features is
+returns a callable object.  One of aesara's most important features is
 that `function` can transform your graph before compiling it.  It can
 replace simple expressions with faster or more numerically stable
 implementations.
@@ -16,58 +16,54 @@
 
 - Op List (:doc:`oplist`)
 
-The markup language used in the docstrings is ReStructured Text,
-which may be rendered with Sphinx. A rendered version is
-maintained at http://www.deeplearning.net/software/theano/library/
-
 """
 
 
 __docformat__ = "restructuredtext en"
 
 # Set a default logger. It is important to do this before importing some other
-# theano code, since this code may want to log some messages.
+# aesara code, since this code may want to log some messages.
 import logging
 import os
 import sys
 
 
-theano_logger = logging.getLogger("theano")
+aesara_logger = logging.getLogger("aesara")
 logging_default_handler = logging.StreamHandler()
 logging_default_formatter = logging.Formatter(
     fmt="%(levelname)s (%(name)s): %(message)s"
 )
 logging_default_handler.setFormatter(logging_default_formatter)
-theano_logger.setLevel(logging.WARNING)
+aesara_logger.setLevel(logging.WARNING)
 
-if not theano_logger.hasHandlers():
-    theano_logger.addHandler(logging_default_handler)
+if not aesara_logger.hasHandlers():
+    aesara_logger.addHandler(logging_default_handler)
 
 
-# Disable default log handler added to theano_logger when the module
+# Disable default log handler added to aesara_logger when the module
 # is imported.
-def disable_log_handler(logger=theano_logger, handler=logging_default_handler):
+def disable_log_handler(logger=aesara_logger, handler=logging_default_handler):
     if logger.hasHandlers():
         logger.removeHandler(handler)
 
 
 # Version information.
-from theano.version import version as __version__
+from aesara.version import version as __version__
 
 
-# Raise a meaningful warning/error if the theano directory is in the Python
+# Raise a meaningful warning/error if the aesara directory is in the Python
 # path.
 rpath = os.path.realpath(__path__[0])
 for p in sys.path:
     if os.path.realpath(p) != rpath:
         continue
-    raise RuntimeError("You have the theano directory in your Python path.")
+    raise RuntimeError("You have the aesara directory in your Python path.")
 
-from theano.configdefaults import config
-from theano.utils import deprecated
+from aesara.configdefaults import config
+from aesara.utils import deprecated
 
 
-change_flags = deprecated("Use theano.config.change_flags instead!")(
+change_flags = deprecated("Use aesara.config.change_flags instead!")(
     config.change_flags
 )
 
@@ -80,8 +76,8 @@ def disable_log_handler(logger=theano_logger, handler=logging_default_handler):
 # very rarely.
 __api_version__ = 1
 
-from theano import scalar, tensor
-from theano.compile import (
+from aesara import scalar, tensor
+from aesara.compile import (
     In,
     Mode,
     Out,
@@ -92,11 +88,11 @@ def disable_log_handler(logger=theano_logger, handler=logging_default_handler):
     predefined_optimizers,
     shared,
 )
-from theano.compile.function import function, function_dump
-from theano.compile.function.types import FunctionMaker
-from theano.gradient import Lop, Rop, grad, subgraph_grad
-from theano.printing import pp, pprint
-from theano.updates import OrderedUpdates
+from aesara.compile.function import function, function_dump
+from aesara.compile.function.types import FunctionMaker
+from aesara.gradient import Lop, Rop, grad, subgraph_grad
+from aesara.printing import pp, pprint
+from aesara.updates import OrderedUpdates
 
 
 if (
@@ -106,7 +102,7 @@ def disable_log_handler(logger=theano_logger, handler=logging_default_handler):
     or config.init_gpu_device.startswith("opencl")
     or config.contexts != ""
 ):
-    import theano.gpuarray
+    import aesara.gpuarray
 
 
 def get_scalar_constant_value(v):
@@ -115,12 +111,12 @@ def get_scalar_constant_value(v):
     If `v` is the output of dim-shuffles, fills, allocs, rebroadcasts, cast
     this function digs through them.
 
-    If ``theano.sparse`` is also there, we will look over CSM `Op`.
+    If ``aesara.sparse`` is also there, we will look over CSM `Op`.
 
     If `v` is not some view of constant data, then raise a
     `NotScalarConstantError`.
     """
-    # Is it necessary to test for presence of theano.sparse at runtime?
+    # Is it necessary to test for presence of aesara.sparse at runtime?
     sparse = globals().get("sparse")
     if sparse and isinstance(v.type, sparse.SparseType):
         if v.owner is not None and isinstance(v.owner.op, sparse.CSM):
@@ -129,13 +125,13 @@ def get_scalar_constant_value(v):
     return tensor.get_scalar_constant_value(v)
 
 
-import theano.tensor.random.var
-from theano.graph.basic import clone_replace
-from theano.scan import checkpoints
-from theano.scan.basic import scan
-from theano.scan.views import foldl, foldr, map, reduce
+import aesara.tensor.random.var
+from aesara.graph.basic import clone_replace
+from aesara.scan import checkpoints
+from aesara.scan.basic import scan
+from aesara.scan.views import foldl, foldr, map, reduce
 
 
 # Some config variables are registered by submodules. Only after all those imports
-# were executed, we can warn about remaining flags provided by the user through THEANO_FLAGS.
+# were executed, we can warn about remaining flags provided by the user through AESARA_FLAGS.
 config.warn_unused_flags()
diff --git a/theano/_version.py b/aesara/_version.py
similarity index 99%
rename from theano/_version.py
rename to aesara/_version.py
index b2a9eb5823..61b26e4f16 100644
--- a/theano/_version.py
+++ b/aesara/_version.py
@@ -42,7 +42,7 @@ def get_config():
     cfg.style = "pep440"
     cfg.tag_prefix = "rel-"
     cfg.parentdir_prefix = "None"
-    cfg.versionfile_source = "theano/_version.py"
+    cfg.versionfile_source = "aesara/_version.py"
     cfg.verbose = False
     return cfg
 
diff --git a/theano/assert_op.py b/aesara/assert_op.py
similarity index 86%
rename from theano/assert_op.py
rename to aesara/assert_op.py
index d42678efd5..5b10a267dc 100644
--- a/theano/assert_op.py
+++ b/aesara/assert_op.py
@@ -1,8 +1,8 @@
 import numpy as np
 
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import COp
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import COp
 
 
 class Assert(COp):
@@ -23,11 +23,11 @@ class Assert(COp):
 
     Examples
     --------
-    >>> import theano
-    >>> T = theano.tensor
+    >>> import aesara
+    >>> T = aesara.tensor
     >>> x = T.vector('x')
     >>> assert_op = T.opt.Assert()
-    >>> func = theano.function([x], assert_op(x, x.size<2))
+    >>> func = aesara.function([x], assert_op(x, x.size<2))
 
     """
 
@@ -37,16 +37,16 @@ class Assert(COp):
 
     check_input = False
 
-    def __init__(self, msg="Theano Assert failed!"):
+    def __init__(self, msg="Aesara Assert failed!"):
         self.msg = msg
 
     def __setstate__(self, attrs):
         self.__dict__.update(attrs)
         if not hasattr(self, "msg"):
-            self.msg = "Theano Assert failed!"
+            self.msg = "Aesara Assert failed!"
 
     def make_node(self, value, *conds):
-        from theano.tensor import as_tensor_variable
+        from aesara.tensor import as_tensor_variable
 
         if not isinstance(value, Variable):
             value = as_tensor_variable(value)
diff --git a/theano/breakpoint.py b/aesara/breakpoint.py
similarity index 88%
rename from theano/breakpoint.py
rename to aesara/breakpoint.py
index 7f21a8c5d7..95ce66ac31 100644
--- a/theano/breakpoint.py
+++ b/aesara/breakpoint.py
@@ -1,15 +1,15 @@
 import numpy as np
 
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import Op
-from theano.tensor.basic import as_tensor_variable
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import Op
+from aesara.tensor.basic import as_tensor_variable
 
 
 class PdbBreakpoint(Op):
     """
     This is an identity-like op with the side effect of enforcing a
-    conditional breakpoint, inside a theano function, based on a symbolic
+    conditional breakpoint, inside an Aesara function, based on a symbolic
     scalar condition. It automatically detects available debuggers and uses
     the first available in the following order: `pudb`, `ipdb`, or `pdb`.
 
@@ -18,11 +18,11 @@ class PdbBreakpoint(Op):
                  breakpoint is activated.
 
     :note: WARNING. At least one of the outputs of the op must be used
-                    otherwise the op will be removed from the Theano graph
+                    otherwise the op will be removed from the Aesara graph
                     due to its outputs being unused
 
-    :note: WARNING. Employing the function inside a theano graph can prevent
-                    Theano from applying certain optimizations to improve
+    :note: WARNING. Employing the function inside an Aesara graph can prevent
+                    Aesara from applying certain optimizations to improve
                     performance, reduce memory consumption and/or reduce
                     numerical instability.
 
@@ -35,9 +35,9 @@ class PdbBreakpoint(Op):
 
     .. code-block:: python
 
-        import theano
-        import theano.tensor as tt
-        from theano.breakpoint import PdbBreakpoint
+        import aesara
+        import aesara.tensor as tt
+        from aesara.breakpoint import PdbBreakpoint
 
         input = tt.fvector()
         target = tt.fvector()
@@ -53,8 +53,8 @@ class PdbBreakpoint(Op):
         mse, monitored_input, monitored_target = breakpointOp(condition, mse,
                                                               input, target)
 
-        # Compile the theano function
-        fct = theano.function([input, target], mse)
+        # Compile the aesara function
+        fct = aesara.function([input, target], mse)
 
         # Use the function
         print fct([10, 0], [10, 5]) # Will NOT activate the breakpoint
@@ -70,7 +70,7 @@ def __init__(self, name):
 
     def make_node(self, condition, *monitored_vars):
 
-        # Ensure that condition is a theano tensor
+        # Ensure that condition is an Aesara tensor
         if not isinstance(condition, Variable):
             condition = as_tensor_variable(condition)
 
diff --git a/theano/compile/__init__.py b/aesara/compile/__init__.py
similarity index 71%
rename from theano/compile/__init__.py
rename to aesara/compile/__init__.py
index 9765761ae5..e08b2ddb75 100644
--- a/theano/compile/__init__.py
+++ b/aesara/compile/__init__.py
@@ -1,5 +1,5 @@
-from theano.compile.function.pfunc import Param, pfunc, rebuild_collect_shared
-from theano.compile.function.types import (
+from aesara.compile.function.pfunc import Param, pfunc, rebuild_collect_shared
+from aesara.compile.function.types import (
     AliasedMemoryError,
     Function,
     FunctionMaker,
@@ -17,8 +17,8 @@
     std_fgraph,
     view_tree_set,
 )
-from theano.compile.io import In, Out, SymbolicInput, SymbolicOutput
-from theano.compile.mode import (
+from aesara.compile.io import In, Out, SymbolicInput, SymbolicOutput
+from aesara.compile.mode import (
     FAST_COMPILE,
     FAST_RUN,
     JAX,
@@ -47,8 +47,8 @@
     register_mode,
     register_optimizer,
 )
-from theano.compile.monitormode import MonitorMode
-from theano.compile.ops import (
+from aesara.compile.monitormode import MonitorMode
+from aesara.compile.ops import (
     DeepCopyOp,
     FromFunctionOp,
     ViewOp,
@@ -58,5 +58,5 @@
     register_view_op_c_code,
     view_op,
 )
-from theano.compile.profiling import ProfileStats, ScanProfileStats
-from theano.compile.sharedvalue import SharedVariable, shared, shared_constructor
+from aesara.compile.profiling import ProfileStats, ScanProfileStats
+from aesara.compile.sharedvalue import SharedVariable, shared, shared_constructor
diff --git a/theano/compile/builders.py b/aesara/compile/builders.py
similarity index 93%
rename from theano/compile/builders.py
rename to aesara/compile/builders.py
index 2eca0cce33..fedf8fea46 100644
--- a/theano/compile/builders.py
+++ b/aesara/compile/builders.py
@@ -3,25 +3,25 @@
 from collections import OrderedDict
 from functools import partial, reduce
 
-import theano
-from theano import tensor as tt
-from theano.compile.function.pfunc import rebuild_collect_shared
-from theano.compile.function.types import orig_function
-from theano.compile.mode import optdb
-from theano.compile.sharedvalue import SharedVariable
-from theano.configdefaults import config
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply, Variable, graph_inputs, io_connection_pattern
-from theano.graph.fg import FunctionGraph
-from theano.graph.null_type import NullType
-from theano.graph.op import Op, ops_with_inner_function
-from theano.graph.opt import in2out, local_optimizer
-from theano.tensor.basic_opt import ShapeFeature
+import aesara
+from aesara import tensor as tt
+from aesara.compile.function.pfunc import rebuild_collect_shared
+from aesara.compile.function.types import orig_function
+from aesara.compile.mode import optdb
+from aesara.compile.sharedvalue import SharedVariable
+from aesara.configdefaults import config
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply, Variable, graph_inputs, io_connection_pattern
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.null_type import NullType
+from aesara.graph.op import Op, ops_with_inner_function
+from aesara.graph.opt import in2out, local_optimizer
+from aesara.tensor.basic_opt import ShapeFeature
 
 
 def infer_shape(outs, inputs, input_shapes):
     """
-    Compute the shape of the outputs given the shape of the inputs of a theano
+    Compute the shape of the outputs given the shape of the inputs of an Aesara
     graph.
 
     We do it this way to avoid compiling the inner function just to get
@@ -75,7 +75,7 @@ def local_traverse(out):
 class OpFromGraph(Op):
     r"""
     This creates an ``Op`` from inputs and outputs lists of variables.
-    The signature is similar to :func:`theano.function <theano.function>`
+    The signature is similar to :func:`aesara.function <aesara.function>`
     and the resulting ``Op``'s perform will do the same operation as::
 
         orig_function(inputs, outputs, **kwargs)
@@ -85,9 +85,9 @@ class OpFromGraph(Op):
     Parameters
     ----------
 
-    inputs: list of :class:`Variable <theano.graph.basic.Variable>`
+    inputs: list of :class:`Variable <aesara.graph.basic.Variable>`
 
-    outputs: list of :class:`Variable <theano.graph.basic.Variable>`
+    outputs: list of :class:`Variable <aesara.graph.basic.Variable>`
 
     inline: bool, optional
         Defaults to ``False``
@@ -109,15 +109,15 @@ class OpFromGraph(Op):
         arguments as one would specify in grad() method.
 
         callable : Should take two args: ``inputs`` and ``output_grads``.
-        Each argument is expected to be a list of :class:`Variable <theano.graph.basic.Variable>`.
-        Must return list of :class:`Variable <theano.graph.basic.Variable>`.
+        Each argument is expected to be a list of :class:`Variable <aesara.graph.basic.Variable>`.
+        Must return list of :class:`Variable <aesara.graph.basic.Variable>`.
 
         Variable :
             ``NullType() instance`` : Treat as non-differentiable
             ``DisconnectedType() instance`` : Treat as disconnected gradient, numerically gives zero
 
         list: Each OpFromGraph/callable must return a single
-        :class:`Variable <theano.graph.basic.Variable>`. Each list element corresponds to gradient of
+        :class:`Variable <aesara.graph.basic.Variable>`. Each list element corresponds to gradient of
         a specific input, length of list must be equal to number of inputs.
 
     lop_overrides : single or list of {'default', OpFromGraph, callable, Variable with special type}, optional
@@ -131,15 +131,15 @@ class OpFromGraph(Op):
         arguments as one would specify in grad() method.
 
         callable : Should take three args: ``inputs``, ``outputs`` and ``output_grads``.
-        Each argument is expected to be a list of :class:`Variable <theano.graph.basic.Variable>`.
-        Must return list of :class:`Variable <theano.graph.basic.Variable>`.
+        Each argument is expected to be a list of :class:`Variable <aesara.graph.basic.Variable>`.
+        Must return list of :class:`Variable <aesara.graph.basic.Variable>`.
 
         Variable :
             ``NullType() instance`` : Treat as non-differentiable
             ``DisconnectedType() instance`` : Treat as disconnected gradient, numerically gives zero
 
         list: Each OpFromGraph/callable must return a single
-        :class:`Variable <theano.graph.basic.Variable>`. Each list element corresponds to gradient of
+        :class:`Variable <aesara.graph.basic.Variable>`. Each list element corresponds to gradient of
         a specific input, length of list must be equal to number of inputs.
 
     rop_overrides : single or list of {'default', OpFromGraph, callable, Variable with special type}, optional
@@ -152,15 +152,15 @@ class OpFromGraph(Op):
         arguments as one would specify in R_op() method.
 
         callable : Should take two args: ``inputs`` and ``eval_points``.
-        Each argument is expected to be a list of :class:`Variable <theano.graph.basic.Variable>`.
-        Must return list of :class:`Variable <theano.graph.basic.Variable>`.
+        Each argument is expected to be a list of :class:`Variable <aesara.graph.basic.Variable>`.
+        Must return list of :class:`Variable <aesara.graph.basic.Variable>`.
 
         Variable :
             ``NullType() instance`` : Treat as non-differentiable
             ``DisconnectedType() instance`` : Treat as zero since DisconnectedType is not yet supported in R_op
 
         list: Each OpFromGraph/callable must return a single
-        :class:`Variable <theano.graph.basic.Variable>`. Each list element corresponds
+        :class:`Variable <aesara.graph.basic.Variable>`. Each list element corresponds
         to a specific output of R_op, length of list must be equal to number of outputs.
 
     connection_pattern : list of list
@@ -173,7 +173,7 @@ class OpFromGraph(Op):
 
     \*\*kwargs : optional
         Check
-        :func:`orig_function <theano.compile.function.types.orig_function>`
+        :func:`orig_function <aesara.compile.function.types.orig_function>`
         for more arguments, only works when not inline.
 
 
@@ -215,12 +215,12 @@ class OpFromGraph(Op):
 
     .. code-block:: python
 
-        from theano import function, tensor as tt
-        from theano.compile.builders import OpFromGraph
+        from aesara import function, tensor as tt
+        from aesara.compile.builders import OpFromGraph
         x, y, z = tt.scalars('xyz')
         e = x + y * z
         op = OpFromGraph([x, y, z], [e])
-        # op behaves like a normal theano op
+        # op behaves like a normal aesara op
         e2 = op(x, y, z) + op(z, y, x)
         fn = function([x, y, z], [e2])
 
@@ -229,15 +229,15 @@ class OpFromGraph(Op):
     .. code-block:: python
 
         import numpy as np
-        import theano
-        from theano import config, function, tensor as tt
-        from theano.compile.builders import OpFromGraph
+        import aesara
+        from aesara import config, function, tensor as tt
+        from aesara.compile.builders import OpFromGraph
 
         x, y, z = tt.scalars('xyz')
-        s = theano.shared(np.random.rand(2, 2).astype(config.floatX))
+        s = aesara.shared(np.random.rand(2, 2).astype(config.floatX))
         e = x + y * z + s
         op = OpFromGraph([x, y, z], [e])
-        # op behaves like a normal theano op
+        # op behaves like a normal aesara op
         e2 = op(x, y, z) + op(z, y, x)
         fn = function([x, y, z], [e2])
 
@@ -245,8 +245,8 @@ class OpFromGraph(Op):
 
     .. code-block:: python
 
-        from theano import function, tensor as tt, grad
-        from theano.compile.builders import OpFromGraph
+        from aesara import function, tensor as tt, grad
+        from aesara.compile.builders import OpFromGraph
 
         x, y, z = tt.scalars('xyz')
         e = x + y * z
@@ -441,7 +441,7 @@ def lop_op(inps, grads):
 
         output_grads = [out_t() for out_t in self.output_types]
         fn_grad = partial(
-            theano.gradient.grad,
+            aesara.gradient.grad,
             cost=None,
             disconnected_inputs="ignore",
             return_disconnected="Disconnected",
@@ -559,7 +559,7 @@ def _recompute_rop_op(self):
             return
 
         eval_points = [inp_t() for inp_t in self.input_types]
-        fn_rop = partial(theano.gradient.Rop, wrt=local_inputs, eval_points=eval_points)
+        fn_rop = partial(aesara.gradient.Rop, wrt=local_inputs, eval_points=eval_points)
         TYPE_ERR_MSG = (
             "R_op overrides should be (single or list of)"
             "OpFromGraph | 'default' | None | 0 | callable, got %s"
@@ -781,12 +781,12 @@ def infer_shape(self, fgraph, node, shapes):
         # Clone the output shape so that shape are computed from outer inputs.
         # Note:
         # Here we can do it more simply like:
-        #      ret = [theano.clone_replace(shp, replace=repl) for shp in out_shp]
+        #      ret = [aesara.clone_replace(shp, replace=repl) for shp in out_shp]
         # But  doing it multiple time could duplicate common subgraph between
-        # each shape call. Theano optimizer will clean this up later, but this
+        # each shape call. Aesara optimizer will clean this up later, but this
         # will ask extra work to the optimizer.
         repl = dict(zip(self.local_inputs, node.inputs))
-        cloned = theano.clone_replace(reduce(tuple.__add__, out_shp), replace=repl)
+        cloned = aesara.clone_replace(reduce(tuple.__add__, out_shp), replace=repl)
         ret = []
         used = 0
         for i in range(len(out_shp)):
@@ -824,7 +824,7 @@ def inline_ofg_expansion(fgraph, node):
         return False
     if not op.is_inline:
         return False
-    return theano.clone_replace(
+    return aesara.clone_replace(
         op.local_outputs, {u: v for u, v in zip(node.op.local_inputs, node.inputs)}
     )
 
@@ -839,6 +839,6 @@ def inline_ofg_expansion(fgraph, node):
     "fast_run",
 )
 
-# Since OpFromGraph contains a Theano compiled function,
+# Since OpFromGraph contains an Aesara compiled function,
 # we should let DebugMode know about it
 ops_with_inner_function[OpFromGraph] = "fn"
diff --git a/theano/compile/compiledir.py b/aesara/compile/compiledir.py
similarity index 92%
rename from theano/compile/compiledir.py
rename to aesara/compile/compiledir.py
index ffe541b288..078225e7a4 100644
--- a/theano/compile/compiledir.py
+++ b/aesara/compile/compiledir.py
@@ -1,6 +1,6 @@
 """
 This module contains housekeeping functions for cleaning/purging the "compiledir".
-It is used by the "theano-cache" CLI tool, located in the /bin folder of the repository.
+It is used by the "aesara-cache" CLI tool, located in the /bin folder of the repository.
 """
 import logging
 import os
@@ -9,12 +9,12 @@
 
 import numpy as np
 
-import theano
-from theano.configdefaults import config
-from theano.utils import flatten
+import aesara
+from aesara.configdefaults import config
+from aesara.utils import flatten
 
 
-_logger = logging.getLogger("theano.compile.compiledir")
+_logger = logging.getLogger("aesara.compile.compiledir")
 
 
 def cleanup():
@@ -55,7 +55,7 @@ def cleanup():
                                 elif obj.startswith("c_compiler_str="):
                                     have_c_compiler = True
                             elif isinstance(
-                                obj, (theano.graph.op.Op, theano.graph.type.CType)
+                                obj, (aesara.graph.op.Op, aesara.graph.type.CType)
                             ) and hasattr(obj, "c_code_cache_version"):
                                 v = obj.c_code_cache_version()
                                 if v not in [(), None] and v not in key[0]:
@@ -106,7 +106,7 @@ def print_title(title, overline="", underline=""):
 
 def print_compiledir_content():
     """
-    print list of %d compiled individual ops in the "theano.config.compiledir"
+    print list of %d compiled individual ops in the "aesara.config.compiledir"
     """
     max_key_file_size = 1 * 1024 * 1024  # 1M
 
@@ -129,7 +129,7 @@ def print_compiledir_content():
                     {
                         x
                         for x in flatten(keydata.keys)
-                        if isinstance(x, theano.graph.op.Op)
+                        if isinstance(x, aesara.graph.op.Op)
                     }
                 )
                 # Whatever the case, we count compilations for OP classes.
@@ -143,7 +143,7 @@ def print_compiledir_content():
                         {
                             x
                             for x in flatten(keydata.keys)
-                            if isinstance(x, theano.graph.type.CType)
+                            if isinstance(x, aesara.graph.type.CType)
                         }
                     )
                     compile_start = compile_end = float("nan")
@@ -178,12 +178,12 @@ def print_compiledir_content():
             except AttributeError:
                 _logger.error(f"Could not read key file '{filename}'.")
 
-    print_title(f"Theano cache: {compiledir}", overline="=", underline="=")
+    print_title(f"Aesara cache: {compiledir}", overline="=", underline="=")
     print()
 
     print_title(f"List of {len(table)} compiled individual ops", underline="+")
     print_title(
-        "sub dir/compiletime/Op/set of different associated Theano types", underline="-"
+        "sub dir/compiletime/Op/set of different associated Aesara types", underline="-"
     )
     table = sorted(table, key=lambda t: str(t[1]))
     for dir, op, types, compile_time in table:
@@ -194,7 +194,7 @@ def print_compiledir_content():
         f"List of {len(table_multiple_ops)} compiled sets of ops", underline="+"
     )
     print_title(
-        "sub dir/compiletime/Set of ops/set of different associated Theano types",
+        "sub dir/compiletime/Set of ops/set of different associated Aesara types",
         underline="-",
     )
     table_multiple_ops = sorted(table_multiple_ops, key=lambda t: (t[1], t[2]))
@@ -239,7 +239,7 @@ def print_compiledir_content():
     print()
     print(
         f"Skipped {int(zeros_op)} files that contained 0 op "
-        "(are they always theano.scalar ops?)"
+        "(are they always aesara.scalar ops?)"
     )
 
 
@@ -249,7 +249,7 @@ def compiledir_purge():
 
 def basecompiledir_ls():
     """
-    Print list of files in the "theano.config.base_compiledir"
+    Print list of files in the "aesara.config.base_compiledir"
     """
     subdirs = []
     others = []
diff --git a/theano/compile/compilelock.py b/aesara/compile/compilelock.py
similarity index 98%
rename from theano/compile/compilelock.py
rename to aesara/compile/compilelock.py
index 719db9a597..1c2e7ba7d4 100644
--- a/theano/compile/compilelock.py
+++ b/aesara/compile/compilelock.py
@@ -9,7 +9,7 @@
 
 import filelock
 
-from theano.configdefaults import config
+from aesara.configdefaults import config
 
 
 __all__ = [
diff --git a/theano/compile/debugmode.py b/aesara/compile/debugmode.py
similarity index 98%
rename from theano/compile/debugmode.py
rename to aesara/compile/debugmode.py
index 984990a343..3c39279cda 100644
--- a/theano/compile/debugmode.py
+++ b/aesara/compile/debugmode.py
@@ -1,5 +1,5 @@
 """
-Provides `DebugMode`, an evaluation mode for debugging theano internals.
+Provides `DebugMode`, an evaluation mode for debugging aesara internals.
 
 TODO: add support for IfElse Op, LazyLinker, etc.
 
@@ -18,35 +18,35 @@
 
 import numpy as np
 
-import theano
-from theano.compile.function.types import (
+import aesara
+from aesara.compile.function.types import (
     Function,
     FunctionMaker,
     infer_reuse_pattern,
     std_fgraph,
 )
-from theano.compile.mode import Mode, register_mode
-from theano.compile.ops import OutputGuard, _output_guard
-from theano.configdefaults import config
-from theano.graph.basic import Variable, graph_inputs, io_toposort
-from theano.graph.destroyhandler import DestroyHandler
-from theano.graph.fg import InconsistencyError
-from theano.graph.op import COp, Op, ops_with_inner_function
-from theano.graph.toolbox import BadOptimization
-from theano.graph.utils import MethodNotDefined
-from theano.link.basic import Container, LocalLinker
-from theano.link.utils import map_storage, raise_with_op
-from theano.utils import NoDuplicateOptWarningFilter, difference, get_unbound_function
+from aesara.compile.mode import Mode, register_mode
+from aesara.compile.ops import OutputGuard, _output_guard
+from aesara.configdefaults import config
+from aesara.graph.basic import Variable, graph_inputs, io_toposort
+from aesara.graph.destroyhandler import DestroyHandler
+from aesara.graph.fg import InconsistencyError
+from aesara.graph.op import COp, Op, ops_with_inner_function
+from aesara.graph.toolbox import BadOptimization
+from aesara.graph.utils import MethodNotDefined
+from aesara.link.basic import Container, LocalLinker
+from aesara.link.utils import map_storage, raise_with_op
+from aesara.utils import NoDuplicateOptWarningFilter, difference, get_unbound_function
 
 
 __docformat__ = "restructuredtext en"
-_logger: Logger = logging.getLogger("theano.compile.debugmode")
+_logger: Logger = logging.getLogger("aesara.compile.debugmode")
 _logger.addFilter(NoDuplicateOptWarningFilter())
 
 
 class DebugModeError(Exception):
     """
-    Generic Exception raised to indicate an internal theano problem.
+    Generic Exception raised to indicate an internal aesara problem.
 
     """
 
@@ -268,9 +268,7 @@ class StochasticOrder(DebugModeError):
 
     The most common cause is that an Optimization iterates over some
     objects in a memory-address-dependent order (such as id() or
-    object.hash()).  If you see this error and you think it is related
-    to optimizations within Theano, email theano-dev with the message
-    attached to this exception.
+    object.hash()).
 
     """
 
@@ -404,7 +402,7 @@ def str_diagnostic(expected, value, rtol, atol):
         print(ssio.getvalue(), file=sio)
     except Exception:
         pass
-    atol_, rtol_ = theano.tensor.math._get_atol_rtol(expected, value)
+    atol_, rtol_ = aesara.tensor.math._get_atol_rtol(expected, value)
     if rtol is not None:
         rtol_ = rtol
     if atol is not None:
@@ -496,7 +494,7 @@ def debugprint(
         A dictionary mapping a scan ops inner function inputs to the scan op
         inputs (outer inputs) for printing purposes.
     smap
-        None or the storage_map when printing an Theano function.
+        None or the storage_map when printing an Aesara function.
     used_ids
         Internal. Used to pass information when recursing.
         It is a dict from obj to the id used for it.
@@ -633,7 +631,7 @@ def get_id_str(obj, get_printed=True) -> str:
                         new_prefix_child = prefix_child + "  "
 
                     if hasattr(i, "owner") and hasattr(i.owner, "op"):
-                        from theano.scan.op import Scan
+                        from aesara.scan.op import Scan
 
                         if isinstance(i.owner.op, Scan):
                             scan_ops.append(i)
@@ -841,7 +839,7 @@ def _check_viewmap(fgraph, node, storage_map):
         view_map = getattr(node.op, "view_map", {})
         destroy_map = getattr(node.op, "destroy_map", {})
 
-        # In theory, theano's view_map only allows for 1 output to
+        # In theory, aesara's view_map only allows for 1 output to
         # alias 1 input. Checking for multiple aliases just in
         # case...
 
@@ -948,7 +946,7 @@ def _lessbroken_deepcopy(a):
     """
     # this exists because copy.deepcopy on numpy arrays is broken
     # This logic is also in link.py
-    from theano.graph.type import _cdata_type
+    from aesara.graph.type import _cdata_type
 
     if type(a) in (np.ndarray, np.memmap):
         rval = a.copy(order="K")
@@ -1137,8 +1135,8 @@ def _get_preallocated_maps(
     """
 
     # To avoid circular imports
-    from theano.gpuarray import GpuArrayType
-    from theano.tensor.type import TensorType
+    from aesara.gpuarray import GpuArrayType
+    from aesara.tensor.type import TensorType
 
     try:
         import pygpu
@@ -1370,7 +1368,7 @@ def _check_preallocated_output(
 
     """
 
-    # If node has an inner compiled Theano function with mode DebugMode,
+    # If node has an inner compiled Aesara function with mode DebugMode,
     # disable memory checks in that mode, since they were already run.
     try:
         changed_inner_mode = False
@@ -1379,7 +1377,7 @@ def _check_preallocated_output(
             fn = getattr(node.op, fn_attr_name, None)
             if not fn or not hasattr(fn, "maker") or not hasattr(fn.maker, "mode"):
                 _logger.warning(
-                    f"Expected theano function not found in {node.op}.{fn_attr_name}"
+                    f"Expected aesara function not found in {node.op}.{fn_attr_name}"
                 )
             else:
                 if isinstance(fn.maker.mode, DebugMode):
@@ -1758,7 +1756,7 @@ def make_all(
         # can't import at toplevel because of circular import TODO:
         # don't do this ugly hacky way of setting the
         # filter_checks_isfinite
-        from theano.tensor.type import TensorType  # to set filter_check_isfinite
+        from aesara.tensor.type import TensorType  # to set filter_check_isfinite
 
         fgraph = self.fgraph
         input_storage_ = input_storage
@@ -2366,7 +2364,7 @@ class _Maker(FunctionMaker):  # inheritance buys a few helper functions
         What to do if a variable in the 'inputs' list is not used in the
         graph. Possible values are 'raise', 'warn' and 'ignore'.
     output_keys
-        If the outputs argument for theano.function was a list, then
+        If the outputs argument for aesara.function was a list, then
         output_keys is None. If the outputs argument was a dict, then
         output_keys is a sorted list of the keys from that dict.
 
@@ -2443,7 +2441,7 @@ def __init__(
             with config.change_flags(compute_test_value=config.compute_test_value_opt):
                 optimizer(fgraph)
 
-                theano.compile.function.types.insert_deepcopy(
+                aesara.compile.function.types.insert_deepcopy(
                     fgraph, inputs, list(chain(outputs, additional_outputs))
                 )
 
@@ -2569,7 +2567,7 @@ def __init__(
 
 class DebugMode(Mode):
     """
-    Evaluation Mode that detects internal theano errors.
+    Evaluation Mode that detects internal aesara errors.
 
     This mode catches several kinds of internal error:
 
diff --git a/theano/compile/function/__init__.py b/aesara/compile/function/__init__.py
similarity index 92%
rename from theano/compile/function/__init__.py
rename to aesara/compile/function/__init__.py
index cad68fc3db..b2947034c7 100644
--- a/theano/compile/function/__init__.py
+++ b/aesara/compile/function/__init__.py
@@ -4,14 +4,14 @@
 import warnings
 from collections import OrderedDict
 
-from theano.compile.function.pfunc import pfunc
-from theano.compile.function.types import orig_function
+from aesara.compile.function.pfunc import pfunc
+from aesara.compile.function.types import orig_function
 
 
 __all__ = ["types", "pfunc"]
 
 __docformat__ = "restructuredtext en"
-_logger = logging.getLogger("theano.compile.function")
+_logger = logging.getLogger("aesara.compile.function")
 
 
 def function_dump(
@@ -31,19 +31,19 @@ def function_dump(
     extra_tag_to_remove=None,
 ):
     """
-    This is helpful to make a reproducible case for problems during Theano
+    This is helpful to make a reproducible case for problems during Aesara
     compilation.
 
     Ex:
 
-    replace `theano.function(...)` by
-    `theano.function_dump('filename.pkl', ...)`.
+    replace `aesara.function(...)` by
+    `aesara.function_dump('filename.pkl', ...)`.
 
     If you see this, you were probably asked to use this function to
-    help debug a particular case during the compilation of a Theano
+    help debug a particular case during the compilation of an Aesara
     function. `function_dump` allows you to easily reproduce your
     compilation without generating any code. It pickles all the objects and
-    parameters needed to reproduce a call to `theano.function()`. This
+    parameters needed to reproduce a call to `aesara.function()`. This
     includes shared variables and their values. If you do not want
     that, you can choose to replace shared variables values with zeros by
     calling set_value(...) on them before calling `function_dump`.
@@ -51,9 +51,9 @@ def function_dump(
     To load such a dump and do the compilation:
 
     >>> import pickle
-    >>> import theano
+    >>> import aesara
     >>> d = pickle.load(open("func_dump.bin", "rb"))  # doctest: +SKIP
-    >>> f = theano.function(**d)  # doctest: +SKIP
+    >>> f = aesara.function(**d)  # doctest: +SKIP
 
     Note:
     The parameter `extra_tag_to_remove` is passed to the StripPickler used.
@@ -77,9 +77,9 @@ def function_dump(
         on_unused_input=on_unused_input,
     )
     with open(filename, "wb") as f:
-        import theano.misc.pkl_utils
+        import aesara.misc.pkl_utils
 
-        pickler = theano.misc.pkl_utils.StripPickler(
+        pickler = aesara.misc.pkl_utils.StripPickler(
             f, protocol=-1, extra_tag_to_remove=extra_tag_to_remove
         )
         pickler.dump(d)
@@ -100,7 +100,7 @@ def function(
     on_unused_input=None,
 ):
     """
-    Return a :class:`callable object <theano.compile.function.types.Function>`
+    Return a :class:`callable object <aesara.compile.function.types.Function>`
     that will calculate `outputs` from `inputs`.
 
     Parameters
@@ -158,7 +158,7 @@ def function(
 
     Returns
     -------
-    :class:`theano.compile.function.types.Function` instance
+    :class:`aesara.compile.function.types.Function` instance
         A callable object that will compute the outputs (given the inputs) and
         update the implicit function arguments according to the `updates`.
 
@@ -174,7 +174,7 @@ def function(
 
     Internal documentation:
 
-        What happens when you call theano.function?
+        What happens when you call aesara.function?
            1. RemoveShared: shared variables are just an abstraction to make
         things more convenient for the user. The shared variables are
         transformed into implicit inputs and implicit outputs. The
@@ -187,7 +187,7 @@ def function(
         will detect this.
                     inplace optimizations: say we have an apply node that
         does + on V1 and V2, with output V3. We can change the output to be
-        V1, to use less memory. theano must be told that this optimization is
+        V1, to use less memory. aesara must be told that this optimization is
         happening though, so that other parts of the graph are given the
         correct (pre + or post + ) version of V1.
                   fgraph will raise an error if any of these types of
@@ -198,8 +198,8 @@ def function(
         determining whether to do some optimizations. for example, a fusion
         operation that removes V3 is not very helpful if V3 is also needed for
         some other apply node. fusion operations result in a composite op that
-        takes a minigraph of theano scalars and uses this to do elemwise
-        operations on theano tensors
+        takes a minigraph of aesara scalars and uses this to do elemwise
+        operations on aesara tensors
          3. Optimization
                How well do optimizations apply to new ops?
                  Usually there are no optimizations for new ops. In fact, new
@@ -276,7 +276,7 @@ def opt_log1p(node):
         last_frame = stack[idx]
         if last_frame[0] == source_file or last_frame[0] == compiled_file:
             func_frame = stack[idx - 1]
-            while "theano/graph" in func_frame[0] and idx > 0:
+            while "aesara/graph" in func_frame[0] and idx > 0:
                 idx -= 1
                 # This can happen if we call var.eval()
                 func_frame = stack[idx - 1]
@@ -291,7 +291,7 @@ def opt_log1p(node):
         and len(updates) > 1
     ):
         warnings.warn(
-            "The parameter 'updates' of theano.function()"
+            "The parameter 'updates' of aesara.function()"
             " expects an OrderedDict,"
             " got " + str(type(updates)) + ". Using "
             "a standard dictionary here results in "
@@ -307,7 +307,7 @@ def opt_log1p(node):
         givens = []
     if not isinstance(inputs, (list, tuple)):
         raise Exception(
-            "Input variables of a Theano function should be "
+            "Input variables of an Aesara function should be "
             "contained in a list, even when there is a single "
             "input."
         )
diff --git a/theano/compile/function/pfunc.py b/aesara/compile/function/pfunc.py
similarity index 93%
rename from theano/compile/function/pfunc.py
rename to aesara/compile/function/pfunc.py
index ef1c281c1c..90d33c8568 100644
--- a/theano/compile/function/pfunc.py
+++ b/aesara/compile/function/pfunc.py
@@ -6,15 +6,15 @@
 import logging
 import warnings
 
-from theano.compile.function.types import UnusedInputError, orig_function
-from theano.compile.io import In, Out
-from theano.compile.profiling import ProfileStats
-from theano.compile.sharedvalue import SharedVariable, shared
-from theano.configdefaults import config
-from theano.graph.basic import Constant, Variable
+from aesara.compile.function.types import UnusedInputError, orig_function
+from aesara.compile.io import In, Out
+from aesara.compile.profiling import ProfileStats
+from aesara.compile.sharedvalue import SharedVariable, shared
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, Variable
 
 
-_logger = logging.getLogger("theano.compile.function.pfunc")
+_logger = logging.getLogger("aesara.compile.function.pfunc")
 
 __docformat__ = "restructuredtext en"
 
@@ -37,11 +37,11 @@ def rebuild_collect_shared(
 
     Parameters
     ----------
-    outputs : list of Theano Variables (or Theano expressions)
-        List of Theano variables or expressions representing the outputs of the
+    outputs : list of Aesara Variables (or Aesara expressions)
+        List of Aesara variables or expressions representing the outputs of the
         computational graph.
-    inputs : list of Theano Variables (or Theano expressions)
-        List of Theano variables or expressions representing the inputs of the
+    inputs : list of Aesara Variables (or Aesara expressions)
+        List of Aesara variables or expressions representing the inputs of the
         computational graph (or None).
     replace : dict
         Dictionary describing which subgraphs should be replaced by what.
@@ -223,7 +223,7 @@ def clone_inputs(i):
                 cloned_outputs.append(Out(cloned_v, borrow=v.borrow))
             else:
                 raise TypeError(
-                    "Outputs must be theano Variable or "
+                    "Outputs must be aesara Variable or "
                     "Out instances. Received " + str(v) + " of type " + str(type(v))
                 )
             # computed_list.append(cloned_v)
@@ -240,7 +240,8 @@ def clone_inputs(i):
             cloned_outputs = []  # TODO: get Function.__call__ to return None
         else:
             raise TypeError(
-                "output must be a theano Variable or Out " "instance (or list of them)",
+                "output must be an Aesara Variable or Out "
+                "instance (or list of them)",
                 outputs,
             )
 
@@ -321,7 +322,7 @@ def pfunc(
         Function parameters, these are not allowed to be shared variables.
     outputs : list of Variables or Out instances
         Expressions to compute.
-    mode : string or `theano.compile.mode.Mode` instance
+    mode : string or `aesara.compile.mode.Mode` instance
         Compilation mode.
     updates : iterable over pairs (shared_variable, new_expression). List, tuple or dict.
         Update the values for SharedVariable inputs according to these
@@ -359,7 +360,7 @@ def pfunc(
 
     Returns
     -------
-    theano.compile.Function
+    aesara.compile.Function
         A callable object that will compute the outputs (given the inputs) and
         update the implicit function arguments according to the `updates`.
 
@@ -374,7 +375,7 @@ def pfunc(
     """
     #
     # This function works by cloning the graph (except for the
-    # inputs), and then shipping it off to theano.compile.function.function
+    # inputs), and then shipping it off to aesara.compile.function.function
     # (There it will be cloned again, unnecessarily, because it doesn't know
     # that we already cloned it.)
     #
@@ -421,7 +422,7 @@ def pfunc(
             "lists/tuples with 2 elements"
         )
 
-    # transform params into theano.compile.In objects.
+    # transform params into aesara.compile.In objects.
     inputs = [
         _pfunc_param_to_in(p, allow_downcast=allow_input_downcast) for p in params
     ]
@@ -432,7 +433,7 @@ def pfunc(
         if v in in_variables[(i + 1) :]:
             dup_v_i = in_variables.index(v, (i + 1))
             raise UnusedInputError(
-                f"Variable {v} is used twice in inputs to theano.function, "
+                f"Variable {v} is used twice in inputs to aesara.function, "
                 f"at indices {i} and {dup_v_i}.  This would result in values "
                 "provided for it being ignored. Please do not duplicate "
                 "variables in the inputs list."
@@ -453,10 +454,10 @@ def pfunc(
                 "function. Replacing inputs is currently forbidden because it "
                 "has no effect. One way to modify an input `x` to a function "
                 "evaluating f(x) is to define a new input `y` and use "
-                "`theano.function([y], f(x), givens={x: g(y)})`. Another "
-                "solution consists in using `theano.clone_replace`, e.g. like this: "
-                "`theano.function([x], "
-                "theano.clone_replace(f(x), replace={x: g(x)}))`."
+                "`aesara.function([y], f(x), givens={x: g(y)})`. Another "
+                "solution consists in using `aesara.clone_replace`, e.g. like this: "
+                "`aesara.function([x], "
+                "aesara.clone_replace(f(x), replace={x: g(x)}))`."
             )
 
     # Extend the outputs with the updates on input variables so they are also
diff --git a/theano/compile/function/types.py b/aesara/compile/function/types.py
similarity index 97%
rename from theano/compile/function/types.py
rename to aesara/compile/function/types.py
index 2f7a0b0032..a34fb72a1a 100644
--- a/theano/compile/function/types.py
+++ b/aesara/compile/function/types.py
@@ -14,13 +14,13 @@
 
 import numpy as np
 
-import theano
-import theano.compile.profiling
-from theano.compile.compilelock import lock_ctx
-from theano.compile.io import In, SymbolicInput, SymbolicOutput
-from theano.compile.ops import deep_copy_op, view_op
-from theano.configdefaults import config
-from theano.graph.basic import (
+import aesara
+import aesara.compile.profiling
+from aesara.compile.compilelock import lock_ctx
+from aesara.compile.io import In, SymbolicInput, SymbolicOutput
+from aesara.compile.ops import deep_copy_op, view_op
+from aesara.configdefaults import config
+from aesara.graph.basic import (
     Constant,
     Variable,
     ancestors,
@@ -28,16 +28,16 @@
     graph_inputs,
     vars_between,
 )
-from theano.graph.destroyhandler import DestroyHandler
-from theano.graph.fg import FunctionGraph, InconsistencyError
-from theano.graph.op import ops_with_inner_function
-from theano.graph.toolbox import PreserveVariableAttributes, is_same_graph
-from theano.graph.utils import get_variable_trace_string
-from theano.link.basic import Container
-from theano.link.utils import raise_with_op
+from aesara.graph.destroyhandler import DestroyHandler
+from aesara.graph.fg import FunctionGraph, InconsistencyError
+from aesara.graph.op import ops_with_inner_function
+from aesara.graph.toolbox import PreserveVariableAttributes, is_same_graph
+from aesara.graph.utils import get_variable_trace_string
+from aesara.link.basic import Container
+from aesara.link.utils import raise_with_op
 
 
-_logger = logging.getLogger("theano.compile.function.types")
+_logger = logging.getLogger("aesara.compile.function.types")
 
 __docformat__ = "restructuredtext en"
 
@@ -235,8 +235,8 @@ class AliasedMemoryError(Exception):
 
 class Function:
     """
-    Type of the functions returned by theano.function or
-    theano.FunctionMaker.create.
+    Type of the functions returned by aesara.function or
+    aesara.FunctionMaker.create.
 
     `Function` is the callable object that does computation.  It has the storage
     of inputs and outputs, performs the packing and unpacking of inputs and
@@ -408,7 +408,7 @@ def __init__(
         self._check_for_aliased_inputs = False
         for i in maker.inputs:
             # If the input is a shared variable, the memory region is
-            # under Theano control and so we don't need to check if it
+            # under Aesara control and so we don't need to check if it
             # is aliased as we never do that.
             if (
                 isinstance(i, In)
@@ -599,12 +599,12 @@ def copy(
             Function. Otherwise, it will be old + " copy"
 
         profile :
-            as theano.function profile parameter
+            as aesara.function profile parameter
 
         Returns
         -------
-        theano.Function
-            Copied theano.Function
+        aesara.Function
+            Copied aesara.Function
         """
         # helper function
         def checkSV(sv_ori, sv_rpl):
@@ -613,7 +613,7 @@ def checkSV(sv_ori, sv_rpl):
                 1. same type
                 2. same shape or dim?
             """
-            SharedVariable = theano.tensor.sharedvar.SharedVariable
+            SharedVariable = aesara.tensor.sharedvar.SharedVariable
             assert isinstance(sv_ori, SharedVariable), (
                 "Key of swap should be SharedVariable, given:",
                 sv_ori,
@@ -734,10 +734,10 @@ def checkSV(sv_ori, sv_rpl):
                 message = name
             else:
                 message = str(profile.message) + " copy"
-            profile = theano.compile.profiling.ProfileStats(message=message)
+            profile = aesara.compile.profiling.ProfileStats(message=message)
             # profile -> object
         elif type(profile) == str:
-            profile = theano.compile.profiling.ProfileStats(message=profile)
+            profile = aesara.compile.profiling.ProfileStats(message=profile)
 
         f_cpy = maker.__class__(
             inputs=ins,
@@ -843,7 +843,7 @@ def restore_defaults():
                 c.provided = 0
 
             if len(args) + len(kwargs) > len(self.input_storage):
-                raise TypeError("Too many parameter passed to theano function")
+                raise TypeError("Too many parameter passed to aesara function")
 
             # Set positional arguments
             i = 0
@@ -862,7 +862,7 @@ def restore_defaults():
                         )
 
                     except Exception as e:
-                        function_name = "theano function"
+                        function_name = "aesara function"
                         argument_name = "argument"
                         if self.name:
                             function_name += ' with name "' + self.name + '"'
@@ -1041,7 +1041,7 @@ def restore_defaults():
         #
 
         dt_call = time.time() - t0
-        theano.compile.profiling.total_fct_exec_time += dt_call
+        aesara.compile.profiling.total_fct_exec_time += dt_call
         self.maker.mode.call_time += dt_call
         if profile:
             profile.fct_callcount += 1
@@ -1106,7 +1106,7 @@ def get_shared(self):
         return [i.variable for i in self.maker.inputs if i.implicit]
 
     def sync_shared(self):
-        if hasattr(theano, "gpuarray") and theano.gpuarray.pygpu_activated:
+        if hasattr(aesara, "gpuarray") and aesara.gpuarray.pygpu_activated:
             import pygpu
 
             for i in self.maker.fgraph.update_mapping.values():
@@ -1311,7 +1311,7 @@ class FunctionMaker:
         - 'raise': raise an error
         - 'warn': log a warning
         - 'ignore': do not do anything
-        - None: Use the value in the Theano flags on_unused_input.
+        - None: Use the value in the Aesara flags on_unused_input.
     name : str
         An optional name for this function. If used, the profile mode will
         print the time spent in this function.
@@ -1527,11 +1527,11 @@ def __init__(
         name=None,
     ):
         # Save the provided mode, not the instanciated mode.
-        # The instanciated mode don't pickle and if we unpickle a Theano
+        # The instanciated mode don't pickle and if we unpickle an Aesara
         # function and it get re-compiled, we want the current optimizer to be
         # used, not the optimizer when it was saved.
         self.mode = mode
-        mode = theano.compile.mode.get_mode(mode)
+        mode = aesara.compile.mode.get_mode(mode)
 
         # Assert old way of working isn't used
         if getattr(mode, "profile", None):
@@ -1545,7 +1545,7 @@ def __init__(
             #    too much execution time during testing as we compile
             #    much more functions then the number of compile c
             #    module.
-            theano.link.c.basic.get_module_cache().refresh()
+            aesara.link.c.basic.get_module_cache().refresh()
         # Handle the case where inputs and/or outputs is a single
         # Variable (not in a list)
         unpack_single = False
@@ -1627,7 +1627,7 @@ def __init__(
                     end_optimizer = time.time()
                     opt_time = end_optimizer - start_optimizer
 
-                theano.compile.profiling.total_graph_opt_time += opt_time
+                aesara.compile.profiling.total_graph_opt_time += opt_time
 
                 if profile:
                     if optimizer_profile is None and hasattr(optimizer, "pre_profile"):
@@ -1652,7 +1652,7 @@ def __init__(
         if not hasattr(linker, "accept"):
             raise ValueError(
                 "'linker' parameter of FunctionMaker should be "
-                f"a Linker with an accept method or one of {list(theano.compile.mode.predefined_linkers.keys())}"
+                f"a Linker with an accept method or one of {list(aesara.compile.mode.predefined_linkers.keys())}"
             )
 
         # the 'no_borrow' outputs are the ones for which that we can't
@@ -1719,19 +1719,19 @@ def _check_unused_inputs(self, inputs, outputs, on_unused_input):
         )
 
         msg = (
-            "theano.function was asked to create a function computing "
+            "aesara.function was asked to create a function computing "
             "outputs given certain inputs, but the provided input "
             "variable at index %i is not part of the computational graph "
             "needed to compute the outputs: %s.\n%s"
         )
         warn_msg = (
             "To make this warning into an error, you can pass the "
-            "parameter on_unused_input='raise' to theano.function. "
+            "parameter on_unused_input='raise' to aesara.function. "
             "To disable it completely, use on_unused_input='ignore'."
         )
         err_msg = (
             "To make this error into a warning, you can pass the "
-            "parameter on_unused_input='warn' to theano.function. "
+            "parameter on_unused_input='warn' to aesara.function. "
             "To disable it completely, use on_unused_input='ignore'."
         )
 
@@ -1746,7 +1746,7 @@ def _check_unused_inputs(self, inputs, outputs, on_unused_input):
                 else:
                     raise ValueError(
                         "Invalid value for keyword "
-                        "on_unused_input of theano.function: "
+                        "on_unused_input of aesara.function: "
                         "'%s'.\nValid values are 'raise', "
                         "'warn', and 'ignore'." % on_unused_input
                     )
@@ -1830,7 +1830,7 @@ def create(self, input_storage=None, trustme=False, storage_map=None):
 
         # Get a function instance
         start_linker = time.time()
-        start_import_time = theano.link.c.cmodule.import_time
+        start_import_time = aesara.link.c.cmodule.import_time
 
         with config.change_flags(traceback__limit=config.traceback__compile_limit):
             _fn, _i, _o = self.linker.make_thunk(
@@ -1840,12 +1840,12 @@ def create(self, input_storage=None, trustme=False, storage_map=None):
         end_linker = time.time()
 
         linker_time = end_linker - start_linker
-        theano.compile.profiling.total_time_linker += linker_time
+        aesara.compile.profiling.total_time_linker += linker_time
         _logger.debug(f"Linker took {linker_time:f} seconds")
         if self.profile:
             self.profile.linker_time += linker_time
             _fn.time_thunks = self.profile.flag_time_thunks
-            import_time = theano.link.c.cmodule.import_time - start_import_time
+            import_time = aesara.link.c.cmodule.import_time - start_import_time
             self.profile.import_time += import_time
 
         fn = self.function_builder(
@@ -1926,7 +1926,7 @@ def orig_function(
     on_unused_input : {'raise', 'warn', 'ignore', None}
         What to do if a variable in the 'inputs' list is not used in the graph.
     output_keys :
-        If the outputs were provided to theano.function as a list, then
+        If the outputs were provided to aesara.function as a list, then
         output_keys is None. Otherwise, if outputs were provided as a dict,
         output_keys is the sorted list of keys from the outputs.
 
@@ -1951,7 +1951,7 @@ def orig_function(
     # instance if necessary:
 
     t1 = time.time()
-    mode = theano.compile.mode.get_mode(mode)
+    mode = aesara.compile.mode.get_mode(mode)
 
     inputs = list(map(convert_function_input, inputs))
     if outputs is not None:
diff --git a/theano/compile/io.py b/aesara/compile/io.py
similarity index 98%
rename from theano/compile/io.py
rename to aesara/compile/io.py
index ea9a14e2ac..322f8b6427 100644
--- a/theano/compile/io.py
+++ b/aesara/compile/io.py
@@ -6,10 +6,10 @@
 
 import logging
 
-from theano.link.basic import Container
+from aesara.link.basic import Container
 
 
-_logger = logging.getLogger("theano.compile.io")
+_logger = logging.getLogger("aesara.compile.io")
 
 __docformat__ = "restructuredtext en"
 
@@ -206,7 +206,7 @@ def __init__(
             )
 
         if implicit is None:
-            from theano.compile.sharedvalue import SharedVariable
+            from aesara.compile.sharedvalue import SharedVariable
 
             implicit = isinstance(value, Container) or isinstance(value, SharedVariable)
         super().__init__(
diff --git a/theano/compile/mode.py b/aesara/compile/mode.py
similarity index 93%
rename from theano/compile/mode.py
rename to aesara/compile/mode.py
index cff2252253..f038fb49e5 100644
--- a/theano/compile/mode.py
+++ b/aesara/compile/mode.py
@@ -6,36 +6,36 @@
 import logging
 import warnings
 
-import theano
-from theano.compile.function.types import Supervisor
-from theano.configdefaults import config
-from theano.graph.destroyhandler import DestroyHandler
-from theano.graph.opt import (
+import aesara
+from aesara.compile.function.types import Supervisor
+from aesara.configdefaults import config
+from aesara.graph.destroyhandler import DestroyHandler
+from aesara.graph.opt import (
     CheckStackTraceOptimization,
     GlobalOptimizer,
     MergeOptimizer,
     NavigatorOptimizer,
 )
-from theano.graph.optdb import EquilibriumDB, LocalGroupDB, Query, SequenceDB, TopoDB
-from theano.link.basic import PerformLinker
-from theano.link.c.basic import CLinker, OpWiseCLinker
-from theano.link.jax import JAXLinker
-from theano.link.vm import VMLinker
+from aesara.graph.optdb import EquilibriumDB, LocalGroupDB, Query, SequenceDB, TopoDB
+from aesara.link.basic import PerformLinker
+from aesara.link.c.basic import CLinker, OpWiseCLinker
+from aesara.link.jax import JAXLinker
+from aesara.link.vm import VMLinker
 
 
-_logger = logging.getLogger("theano.compile.mode")
+_logger = logging.getLogger("aesara.compile.mode")
 
 
 # If a string is passed as the linker argument in the constructor for
 # Mode, it will be used as the key to retrieve the real linker in this
 # dictionary
 predefined_linkers = {
-    "py": PerformLinker(),  # Use allow_gc Theano flag
+    "py": PerformLinker(),  # Use allow_gc Aesara flag
     "c": CLinker(),  # Don't support gc. so don't check allow_gc
-    "c|py": OpWiseCLinker(),  # Use allow_gc Theano flag
+    "c|py": OpWiseCLinker(),  # Use allow_gc Aesara flag
     "c|py_nogc": OpWiseCLinker(allow_gc=False),
-    "vm": VMLinker(use_cloop=False),  # Use allow_gc Theano flag
-    "cvm": VMLinker(use_cloop=True),  # Use allow_gc Theano flag
+    "vm": VMLinker(use_cloop=False),  # Use allow_gc Aesara flag
+    "cvm": VMLinker(use_cloop=True),  # Use allow_gc Aesara flag
     "vm_nogc": VMLinker(allow_gc=False, use_cloop=False),
     "cvm_nogc": VMLinker(allow_gc=False, use_cloop=True),
     "jax": JAXLinker(),
@@ -119,7 +119,7 @@ class AddDestroyHandler(GlobalOptimizer):
 
     2) It tries to replace each output with an Op that purports to destroy it
     (but it won't I promise). If this replacement succeeds it means that
-    there is a bug in theano. It should not be possible to destroy outputs.
+    there is a bug in aesara. It should not be possible to destroy outputs.
 
     """
 
@@ -132,7 +132,7 @@ def apply(self, fgraph):
         if not supervisor_added:
             warnings.warn(
                 "Supervisor is not added. Please build a FunctionGraph"
-                "via theano.compile.function.types.std_graph()"
+                "via aesara.compile.function.types.std_graph()"
                 "or add the Supervisor class manually.",
                 stacklevel=3,
             )
@@ -171,10 +171,10 @@ def __init__(self, header):
         self.header = header
 
     def apply(self, fgraph):
-        import theano.printing
+        import aesara.printing
 
         print("PrintCurrentFunctionGraph:", self.header)
-        theano.printing.debugprint(fgraph.outputs)
+        aesara.printing.debugprint(fgraph.outputs)
 
 
 optdb = SequenceDB()
@@ -409,7 +409,7 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
 # string as the key
 # Use VM_linker to allow lazy evaluation by default.
 FAST_COMPILE = Mode(
-    theano.link.vm.VMLinker(use_cloop=False, c_thunks=False), "fast_compile"
+    aesara.link.vm.VMLinker(use_cloop=False, c_thunks=False), "fast_compile"
 )
 if config.cxx:
     FAST_RUN = Mode("cvm", "fast_run")
diff --git a/theano/compile/monitormode.py b/aesara/compile/monitormode.py
similarity index 90%
rename from theano/compile/monitormode.py
rename to aesara/compile/monitormode.py
index 337def3696..5650f7c948 100644
--- a/theano/compile/monitormode.py
+++ b/aesara/compile/monitormode.py
@@ -1,11 +1,9 @@
-# Note: this code was initially copied from the 'pyutools' package by its
-# original author, and re-licensed under Theano's license.
 import numpy as np
 
-from theano.compile.mode import Mode
-from theano.configdefaults import config
-from theano.link.basic import WrapLinkerMany
-from theano.link.c.basic import OpWiseCLinker
+from aesara.compile.mode import Mode
+from aesara.configdefaults import config
+from aesara.link.basic import WrapLinkerMany
+from aesara.link.c.basic import OpWiseCLinker
 
 
 class MonitorMode(Mode):
@@ -95,7 +93,7 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
 
 
 def detect_nan(fgraph, i, node, fn):
-    from theano.printing import debugprint
+    from aesara.printing import debugprint
 
     for output in fn.outputs:
         if (
diff --git a/theano/compile/nanguardmode.py b/aesara/compile/nanguardmode.py
similarity index 86%
rename from theano/compile/nanguardmode.py
rename to aesara/compile/nanguardmode.py
index 62cde91329..816d60875f 100644
--- a/theano/compile/nanguardmode.py
+++ b/aesara/compile/nanguardmode.py
@@ -4,26 +4,26 @@
 
 import numpy as np
 
-import theano
-from theano.compile.mode import Mode, get_mode
-from theano.configdefaults import config
-from theano.tensor.math import abs_
-from theano.tensor.math import max as tt_max
-from theano.tensor.math import min as tt_min
-from theano.tensor.type import discrete_dtypes
+import aesara
+from aesara.compile.mode import Mode, get_mode
+from aesara.configdefaults import config
+from aesara.tensor.math import abs_
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.math import min as tt_min
+from aesara.tensor.type import discrete_dtypes
 
 
 try:
     from pygpu.gpuarray import GpuArray
 
-    from theano.gpuarray.type import GpuArrayType, _name_for_ctx
+    from aesara.gpuarray.type import GpuArrayType, _name_for_ctx
 
     pygpu_available = True
 except ImportError:
     pygpu_available = False
 
 
-logger = logging.getLogger("theano.compile.nanguardmode")
+logger = logging.getLogger("aesara.compile.nanguardmode")
 
 
 def _is_numeric_value(arr, var):
@@ -33,8 +33,8 @@ def _is_numeric_value(arr, var):
 
     Parameters
     ----------
-    arr : the data of that correspond to any Theano Variable
-    var : The corresponding Theano variable
+    arr : the data of that correspond to any Aesara Variable
+    var : The corresponding Aesara variable
 
     Returns
     -------
@@ -42,7 +42,7 @@ def _is_numeric_value(arr, var):
         `True` the value is non-numeric.
 
     """
-    if isinstance(arr, theano.graph.type._cdata_type):
+    if isinstance(arr, aesara.graph.type._cdata_type):
         return False
     elif isinstance(arr, np.random.mtrand.RandomState):
         return False
@@ -90,10 +90,10 @@ def contains_nan(arr, node=None, var=None):
 
     Parameters
     ----------
-    arr : np.ndarray or output of any Theano op
+    arr : np.ndarray or output of any Aesara op
     node : None or an Apply instance.
-        If arr is the output of a Theano op, the node associated to it.
-    var : The Theano symbolic variable.
+        If arr is the output of an Aesara op, the node associated to it.
+    var : The Aesara symbolic variable.
 
     Returns
     -------
@@ -124,10 +124,10 @@ def contains_inf(arr, node=None, var=None):
 
     Parameters
     ----------
-    arr : np.ndarray or output of any Theano op
+    arr : np.ndarray or output of any Aesara op
     node : None or an Apply instance.
-        If the output of a Theano op, the node associated to it.
-    var : The Theano symbolic variable.
+        If the output of an Aesara op, the node associated to it.
+    var : The Aesara symbolic variable.
 
     Returns
     -------
@@ -164,7 +164,7 @@ def result(inp):
         if f is None:
             guard_in = GpuArrayType(str(dtype), (False,), context_name=ctx_name)()
             mode = get_mode("FAST_RUN").including("gpuarray")
-            f = theano.function([guard_in], op(guard_in), mode=mode, profile=False)
+            f = aesara.function([guard_in], op(guard_in), mode=mode, profile=False)
             result.cache[key] = f
         return f(inp)
 
@@ -179,7 +179,7 @@ def result(inp):
 
 class NanGuardMode(Mode):
     """
-    A Theano compilation Mode that makes the compiled function automatically
+    A Aesara compilation Mode that makes the compiled function automatically
     detect NaNs and Infs and detect an error if they occur.
 
     Parameters
@@ -228,9 +228,9 @@ def do_check_on(value, nd, var=None):
             ----------
             value : numpy.ndarray
                 The value to be checked.
-            nd : theano.graph.basic.Apply
+            nd : aesara.graph.basic.Apply
                 The Apply node being executed.
-            var : theano.graph.basic.Variable
+            var : aesara.graph.basic.Variable
                 Not used if nd is there. Otherwise, used to print the stack
                 trace for inputs of the graph.
 
@@ -263,7 +263,7 @@ def do_check_on(value, nd, var=None):
                         "output of a node in this variable:",
                         file=sio,
                     )
-                    print(theano.printing.debugprint(nd, file="str"), file=sio)
+                    print(aesara.printing.debugprint(nd, file="str"), file=sio)
                 else:
                     print(
                         "NanGuardMode found an error in an input of the " "graph.",
@@ -272,7 +272,7 @@ def do_check_on(value, nd, var=None):
                 # Add the stack trace
                 if nd:
                     var = nd.outputs[0]
-                print(theano.graph.utils.get_variable_trace_string(var), file=sio)
+                print(aesara.graph.utils.get_variable_trace_string(var), file=sio)
                 msg = sio.getvalue()
                 if config.NanGuardMode__action == "raise":
                     raise AssertionError(msg)
@@ -295,7 +295,7 @@ def nan_check_input(var, value):
             if getattr(var.tag, "nan_guard_mode_check", True):
                 do_check_on(value, None, var=var)
 
-        wrap_linker = theano.link.vm.VMLinker(
+        wrap_linker = aesara.link.vm.VMLinker(
             callback=nan_check, callback_input=nan_check_input
         )
         super().__init__(wrap_linker, optimizer=self.provided_optimizer)
diff --git a/theano/compile/ops.py b/aesara/compile/ops.py
similarity index 87%
rename from theano/compile/ops.py
rename to aesara/compile/ops.py
index 24501212f0..ef98c75761 100644
--- a/theano/compile/ops.py
+++ b/aesara/compile/ops.py
@@ -9,21 +9,21 @@
 import pickle
 import warnings
 
-from theano.graph.basic import Apply
-from theano.graph.op import COp, Op
-from theano.graph.type import CType
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp, Op
+from aesara.graph.type import CType
 
 
 def register_view_op_c_code(type, code, version=()):
     """
-    Tell ViewOp how to generate C code for a Theano Type.
+    Tell ViewOp how to generate C code for an Aesara Type.
 
     Parameters
     ----------
-    type : Theano type
-        It must be the Theano class itself and not an instance of the class.
+    type : Aesara type
+        It must be the Aesara class itself and not an instance of the class.
     code : C code
-        Returns a view for the Theano type 'type'. Use %(iname)s and %(oname)s
+        Returns a view for the Aesara type 'type'. Use %(iname)s and %(oname)s
         for the input and output C variable names respectively.
     version
         A number indicating the version of the code, for cache.
@@ -34,7 +34,7 @@ def register_view_op_c_code(type, code, version=()):
 
 class ViewOp(COp):
     """
-    Returns an inplace view of the input. Used internally by Theano.
+    Returns an inplace view of the input. Used internally by Aesara.
 
     """
 
@@ -101,15 +101,15 @@ def grad(self, args, g_outs):
 
 class OutputGuard(ViewOp):
     """
-    This op is used only internally by Theano.
+    This op is used only internally by Aesara.
 
     Only the AddDestroyHandler optimizer tries to insert them in the graph.
 
     This Op is declared as destructive while it is not destroying anything.
     It returns a view. This is used to prevent destruction of the output
-    variables of a Theano function.
+    variables of an Aesara function.
 
-    There is a mechanism in Theano that should prevent this, but the use
+    There is a mechanism in Aesara that should prevent this, but the use
     of OutputGuard adds a safeguard: it may be possible for some optimization
     run before the add_destroy_handler phase to bypass this mechanism, by
     making in-place optimizations.
@@ -128,14 +128,14 @@ class OutputGuard(ViewOp):
 
 def register_deep_copy_op_c_code(typ, code, version=()):
     """
-    Tell DeepCopyOp how to generate C code for a Theano Type.
+    Tell DeepCopyOp how to generate C code for an Aesara Type.
 
     Parameters
     ----------
-    typ : Theano type
-        It must be the Theano class itself and not an instance of the class.
+    typ : Aesara type
+        It must be the Aesara class itself and not an instance of the class.
     code: C code
-        Deep copies the Theano type 'typ'. Use %(iname)s and %(oname)s for the
+        Deep copies the Aesara type 'typ'. Use %(iname)s and %(oname)s for the
         input and output C variable names respectively.
     version
         A number indicating the version of the code, for cache.
@@ -209,7 +209,7 @@ def c_code(self, node, name, inames, onames, sub):
 deep_copy_op = DeepCopyOp()
 
 
-# List of Theano Types that one can add an extra dimension and for which
+# List of Aesara Types that one can add an extra dimension and for which
 # Scan can deal with.
 expandable_types = ()
 
@@ -225,14 +225,14 @@ def load_back(mod, name):
 
 class FromFunctionOp(Op):
     """
-    Build a basic Theano Op around a function.
+    Build a basic Aesara Op around a function.
 
     Since the resulting Op is very basic and is missing most of the
     optional functionalities, some optimizations may not apply.  If you
     want to help, you can supply an infer_shape function that computes
     the shapes of the output given the shapes of the inputs.
 
-    Also the gradient is undefined in the resulting op and Theano will
+    Also the gradient is undefined in the resulting op and Aesara will
     raise an error if you attempt to get the gradient of a graph
     containing this op.
 
@@ -285,7 +285,7 @@ def _infer_shape(self, fgraph, node, input_shapes):
 
 def as_op(itypes, otypes, infer_shape=None):
     """
-    Decorator that converts a function into a basic Theano op that will call
+    Decorator that converts a function into a basic Aesara op that will call
     the supplied function as its implementation.
 
     It takes an optional infer_shape parameter that should be a callable with
@@ -303,8 +303,8 @@ def infer_shape(fgraph, node, input_shapes):
 
     Examples
     --------
-    @as_op(itypes=[theano.tensor.fmatrix, theano.tensor.fmatrix],
-           otypes=[theano.tensor.fmatrix])
+    @as_op(itypes=[aesara.tensor.fmatrix, aesara.tensor.fmatrix],
+           otypes=[aesara.tensor.fmatrix])
     def numpy_dot(a, b):
         return numpy.dot(a, b)
 
@@ -312,11 +312,11 @@ def numpy_dot(a, b):
     if not isinstance(itypes, (list, tuple)):
         itypes = [itypes]
     if any(not isinstance(t, CType) for t in itypes):
-        raise TypeError("itypes has to be a list of Theano types")
+        raise TypeError("itypes has to be a list of Aesara types")
     if not isinstance(otypes, (list, tuple)):
         otypes = [otypes]
     if any(not isinstance(t, CType) for t in otypes):
-        raise TypeError("otypes has to be a list of Theano types")
+        raise TypeError("otypes has to be a list of Aesara types")
 
     # make sure they are lists and not tuples
     itypes = list(itypes)
diff --git a/theano/compile/profiling.py b/aesara/compile/profiling.py
similarity index 95%
rename from theano/compile/profiling.py
rename to aesara/compile/profiling.py
index ffc06b4aac..c592012463 100644
--- a/theano/compile/profiling.py
+++ b/aesara/compile/profiling.py
@@ -20,9 +20,9 @@
 
 import numpy as np
 
-import theano
-from theano.configdefaults import config
-from theano.graph.basic import Constant, Variable
+import aesara
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, Variable
 
 
 __authors__ = "James Bergstra" "PyMC Developers"
@@ -30,9 +30,9 @@
 
 __docformat__ = "restructuredtext en"
 
-logger = logging.getLogger("theano.compile.profiling")
+logger = logging.getLogger("aesara.compile.profiling")
 
-theano_imported_time = time.time()
+aesara_imported_time = time.time()
 total_fct_exec_time = 0.0
 total_graph_opt_time = 0.0
 total_time_linker = 0.0
@@ -138,9 +138,9 @@ def _atexit_print_fn():
 def print_global_stats():
     """
     Print the following stats:
-      -- Time elapsed since Theano was imported
-      -- Time spent inside Theano functions
-      -- Time spent in compiling Theano functions
+      -- Time elapsed since Aesara was imported
+      -- Time spent inside Aesara functions
+      -- Time spent in compiling Aesara functions
            -- on graph optimization
            -- on linker
     """
@@ -156,9 +156,9 @@ def print_global_stats():
     print(
         (
             "Global stats: ",
-            f"Time elasped since Theano import = {time.time() - theano_imported_time:6.3f}s, "
-            f"Time spent in Theano functions = {total_fct_exec_time:6.3f}s, "
-            "Time spent compiling Theano functions: "
+            f"Time elasped since Aesara import = {time.time() - aesara_imported_time:6.3f}s, "
+            f"Time spent in Aesara functions = {total_fct_exec_time:6.3f}s, "
+            "Time spent compiling Aesara functions: "
             f" optimization = {total_graph_opt_time:6.3f}s, linker = {total_time_linker:6.3f}s ",
         ),
         file=destination_file,
@@ -178,7 +178,7 @@ class ProfileStats:
 
     """
     Object to store runtime and memory profiling information for all of
-    Theano's operations: compilation, optimization, execution.
+    Aesara's operations: compilation, optimization, execution.
 
     Parameters
     ----------
@@ -289,12 +289,12 @@ def __init__(
     ):
         if (
             gpu_checks
-            and (hasattr(theano, "gpuarray") and theano.gpuarray.pygpu_activated)
+            and (hasattr(aesara, "gpuarray") and aesara.gpuarray.pygpu_activated)
             and os.environ.get("CUDA_LAUNCH_BLOCKING", "0") != "1"
         ):
             msg = (
-                "You are running the Theano profiler with CUDA enabled."
-                " Theano GPU ops execution is asynchronous by default."
+                "You are running the Aesara profiler with CUDA enabled."
+                " Aesara GPU ops execution is asynchronous by default."
                 " So by default, the profile is useless."
                 " You must set the environment variable"
                 " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to"
@@ -308,12 +308,12 @@ def __init__(
         if (
             config.profile
             and gpu_checks
-            and hasattr(theano, "gpuarray")
-            and theano.gpuarray.pygpu_activated
+            and hasattr(aesara, "gpuarray")
+            and aesara.gpuarray.pygpu_activated
             and not config.profiling__ignore_first_call
         ):
             warnings.warn(
-                "Theano flag profiling__ignore_first_call is False. "
+                "Aesara flag profiling__ignore_first_call is False. "
                 "This cause bad profiling result in the gpu "
                 "back-end, as sometimes we compile at the first call."
             )
@@ -548,8 +548,8 @@ def summary_class(self, file=sys.stderr, N=None):
             tot += t
             ftot = tot * 100 / local_time
             # Remove the useless start and end of the class name:
-            # "<class 'theano.gpuarray.blas.GpuDot22'>" ->
-            #  "theano.gpuarray.blas.GpuDot22"
+            # "<class 'aesara.gpuarray.blas.GpuDot22'>" ->
+            #  "aesara.gpuarray.blas.GpuDot22"
             class_name = str(a)[8:-2][:maxlen]
             print(
                 format_str
@@ -831,11 +831,11 @@ def summary_function(self, file):
                 )
         print(f"  Total compile time: {self.compile_time:e}s", file=file)
         print(f"    Number of Apply nodes: {int(self.nb_nodes)}", file=file)
-        print(f"    Theano Optimizer time: {self.optimizer_time:e}s", file=file)
-        print(f"       Theano validate time: {self.validate_time:e}s", file=file)
+        print(f"    Aesara Optimizer time: {self.optimizer_time:e}s", file=file)
+        print(f"       Aesara validate time: {self.validate_time:e}s", file=file)
         print(
             (
-                "    Theano Linker time (includes C, CUDA code "
+                "    Aesara Linker time (includes C, CUDA code "
                 f"generation/compiling): {self.linker_time}s"
             ),
             file=file,
@@ -857,11 +857,11 @@ def summary_function(self, file):
 
     def summary_globals(self, file):
         print(
-            f"Time in all call to theano.grad() {theano.gradient.grad_time:e}s",
+            f"Time in all call to aesara.grad() {aesara.gradient.grad_time:e}s",
             file=file,
         )
-        total_time = time.time() - theano_imported_time
-        print(f"Time since theano import {total_time:.3f}s", file=file)
+        total_time = time.time() - aesara_imported_time
+        print(f"Time since aesara import {total_time:.3f}s", file=file)
 
     def summary_memory(self, file, N=None):
         fct_memory = {}  # fgraph->dict(node->[outputs size])
@@ -927,7 +927,7 @@ def count_running_memory(order, fgraph, nodes_mem, ignore_dmap=False):
                 new allocation.
 
             """
-            from theano.gpuarray import GpuArrayType
+            from aesara.gpuarray import GpuArrayType
 
             # Initial Mem info values [CPU, GPU]
             node_memory_size = [0, 0]
@@ -1022,7 +1022,7 @@ def count_running_memory(order, fgraph, nodes_mem, ignore_dmap=False):
                     running_max_memory_size[1], running_memory_size[1]
                 )
 
-                # Mimic the combination of Theano and Python gc
+                # Mimic the combination of Aesara and Python gc
                 for ins in set(node.inputs):
                     assert not (ins in view_of and viewed_by[ins])
                     # we trac the original var, so this shouldn't happen
@@ -1168,7 +1168,7 @@ def min_memory_generator(executable_nodes, viewed_by, view_of):
                     mem_count += mem_created
                     max_mem_count = max(max_mem_count, mem_count)
 
-                    # Mimic the combination of Theano and Python gc.
+                    # Mimic the combination of Aesara and Python gc.
                     for ins in node.inputs:
                         assert not (ins in view_of and viewed_by[ins])
                         # We track of the original var, so this shouldn't
@@ -1376,7 +1376,7 @@ def print_stats(stats1, stats2):
         print("    Max peak memory with current setting", file=file)
         print_stats(stats[0], stats[2])
         print(
-            "    Max peak memory with current setting and Theano flag optimizer_excluding=inplace",
+            "    Max peak memory with current setting and Aesara flag optimizer_excluding=inplace",
             file=file,
         )
         print_stats(stats[1], stats[3])
@@ -1493,7 +1493,7 @@ def summary(self, file=sys.stderr, n_ops_to_print=20, n_apply_to_print=20):
             )
         if config.profiling__debugprint:
             fcts = {fgraph for (fgraph, n) in self.apply_time.keys()}
-            theano.printing.debugprint(fcts, print_type=True)
+            aesara.printing.debugprint(fcts, print_type=True)
         if self.variable_shape or self.variable_strides:
             self.summary_memory(file, n_apply_to_print)
         if self.optimizer_profile:
@@ -1511,11 +1511,11 @@ def print_tips(self, file):
             file=file,
         )
 
-        from theano import scalar as ts
-        from theano.tensor.elemwise import Elemwise
-        from theano.tensor.math import Dot
-        from theano.tensor.nnet.sigm import ScalarSigmoid, ScalarSoftplus
-        from theano.tensor.random.op import RandomVariable
+        from aesara import scalar as ts
+        from aesara.tensor.elemwise import Elemwise
+        from aesara.tensor.math import Dot
+        from aesara.tensor.nnet.sigm import ScalarSigmoid, ScalarSoftplus
+        from aesara.tensor.random.op import RandomVariable
 
         scalar_op_amdlibm_no_speed_up = [
             ts.LT,
@@ -1612,7 +1612,7 @@ def exp_float32_op(op):
         printed_tip = False
         # tip 1
         if config.floatX == "float64":
-            print("  - Try the Theano flag floatX=float32", file=file)
+            print("  - Try the Aesara flag floatX=float32", file=file)
             printed_tip = True
 
         # tip 2
@@ -1620,7 +1620,7 @@ def exp_float32_op(op):
             [amdlibm_speed_up(a.op) for (fgraph, a) in self.apply_time]
         ):
             print(
-                "  - Try installing amdlibm and set the Theano flag "
+                "  - Try installing amdlibm and set the Aesara flag "
                 "lib__amblibm=True. This speeds up only some Elemwise "
                 "operation.",
                 file=file,
@@ -1636,8 +1636,8 @@ def exp_float32_op(op):
         ):
             print(
                 "  - With the default gcc libm, exp in float32 is slower "
-                "than in float64! Try Theano flag floatX=float64, or "
-                "install amdlibm and set the theano flags lib__amblibm=True",
+                "than in float64! Try Aesara flag floatX=float64, or "
+                "install amdlibm and set the aesara flags lib__amblibm=True",
                 file=file,
             )
             printed_tip = True
@@ -1666,7 +1666,7 @@ def exp_float32_op(op):
                 printed_tip = True
                 print(
                     "  - Replace the default random number generator by "
-                    "'from theano.sandbox.rng_mrg import MRG_RandomStream "
+                    "'from aesara.sandbox.rng_mrg import MRG_RandomStream "
                     "as RandomStream', as this is is faster. It is still "
                     "experimental, but seems to work correctly.",
                     file=file,
@@ -1694,21 +1694,21 @@ def exp_float32_op(op):
                 printed_tip = True
 
         # tip 7
-        import theano.gpuarray
-        import theano.tensor.signal.pool as pool
-        from theano.tensor.nnet.basic import LogSoftmax
+        import aesara.gpuarray
+        import aesara.tensor.signal.pool as pool
+        from aesara.tensor.nnet.basic import LogSoftmax
 
         for (fgraph, a) in self.apply_time:
             node = a
             if isinstance(node.op, pool.Pool):
-                if not theano.gpuarray.dnn.dnn_present():
+                if not aesara.gpuarray.dnn.dnn_present():
                     print(
                         "Install CuDNN to do pooling faster"
                         "this allows the operation to run on GPU"
                     )
                     printed_tip = True
             if isinstance(node.op, LogSoftmax):
-                if not theano.gpuarray.dnn.dnn_present():
+                if not aesara.gpuarray.dnn.dnn_present():
                     print(
                         "Install CuDNN to do LogSoftmax faster"
                         "this allows the operation to run on GPU"
diff --git a/theano/compile/sharedvalue.py b/aesara/compile/sharedvalue.py
similarity index 91%
rename from theano/compile/sharedvalue.py
rename to aesara/compile/sharedvalue.py
index f549807c43..90fa211dea 100644
--- a/theano/compile/sharedvalue.py
+++ b/aesara/compile/sharedvalue.py
@@ -1,5 +1,5 @@
 """
-Provide a simple user friendly API to Theano-managed memory.
+Provide a simple user friendly API to Aesara-managed memory.
 
 """
 
@@ -8,13 +8,13 @@
 
 import numpy as np
 
-from theano.graph.basic import Variable
-from theano.graph.type import generic
-from theano.graph.utils import add_tag_trace
-from theano.link.basic import Container
+from aesara.graph.basic import Variable
+from aesara.graph.type import generic
+from aesara.graph.utils import add_tag_trace
+from aesara.link.basic import Container
 
 
-_logger = logging.getLogger("theano.compile.sharedvalue")
+_logger = logging.getLogger("aesara.compile.sharedvalue")
 __docformat__ = "restructuredtext en"
 
 
@@ -137,7 +137,7 @@ def set_value(self, new_value, borrow=False):
               subtensor of it).
 
         It is also worth mentioning that, for efficient transfer to the GPU,
-        Theano will make the new data ``c_contiguous``. This can require an
+        Aesara will make the new data ``c_contiguous``. This can require an
         extra copy of the data on the host.
 
         The inplace on gpu memory work when borrow is either True or False.
@@ -186,20 +186,20 @@ def __getitem__(self, *args):
         # __getitem__ is not available for generic SharedVariable objects.
         # We raise a TypeError like Python would do if __getitem__ was not
         # implemented at all, but with a more explicit error message to help
-        # Theano users figure out the root of the problem more easily.
+        # Aesara users figure out the root of the problem more easily.
         value = self.get_value(borrow=True)
         if isinstance(value, np.ndarray):
             # Array probably had an unknown dtype.
             msg = (
                 f"a Numpy array with dtype: '{value.dtype}'. This data type is not "
-                "currently recognized by Theano tensors: please cast "
+                "currently recognized by Aesara tensors: please cast "
                 "your data into a supported numeric type if you need "
-                "Theano tensor functionalities."
+                "Aesara tensor functionalities."
             )
         else:
             msg = (
                 f"an object of type: {type(value)}. Did you forget to cast it into "
-                "a Numpy array before calling theano.shared()?"
+                "a Numpy array before calling aesara.shared()?"
             )
 
         raise TypeError(
@@ -246,7 +246,7 @@ def shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
     This function is meant as a convenient default.  If you want to use a
     specific shared variable constructor, consider calling it directly.
 
-    ``theano.shared`` is a shortcut to this function.
+    ``aesara.shared`` is a shortcut to this function.
 
     .. attribute:: constructors
 
@@ -259,8 +259,6 @@ def shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
     to those that can accept those kwargs.
 
     Some shared variable have ``borrow`` as extra kwargs.
-    `See <http://deeplearning.net/software/theano/tutorial/aliasing.\
-    html#borrowing-when-creating-shared-variables>`_ for details.
 
     Some shared variable have ``broadcastable`` as extra kwargs. As shared
     variable shapes can change, all dimensions default to not being
@@ -297,9 +295,7 @@ def shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
             # were supplied, the user didn't want them to be ignored.
 
     except MemoryError as e:
-        e.args = e.args + (
-            "you might consider" " using 'theano.shared(..., borrow=True)'",
-        )
+        e.args = e.args + ("Consider using `aesara.shared(..., borrow=True)`",)
         raise
 
     raise TypeError(
diff --git a/theano/configdefaults.py b/aesara/configdefaults.py
similarity index 94%
rename from theano/configdefaults.py
rename to aesara/configdefaults.py
index 2be8f553a9..bc898ea0cc 100644
--- a/theano/configdefaults.py
+++ b/aesara/configdefaults.py
@@ -10,9 +10,9 @@
 
 import numpy as np
 
-import theano
-import theano.configparser
-from theano.configparser import (
+import aesara
+import aesara.configparser
+from aesara.configparser import (
     BoolParam,
     ConfigParam,
     ContextsParam,
@@ -22,7 +22,7 @@
     IntParam,
     StrParam,
 )
-from theano.utils import (
+from aesara.utils import (
     LOCAL_BITWIDTH,
     PYTHON_INT_BITWIDTH,
     call_subprocess_Popen,
@@ -31,7 +31,7 @@
 )
 
 
-_logger = logging.getLogger("theano.configdefaults")
+_logger = logging.getLogger("aesara.configdefaults")
 
 
 def get_cuda_root():
@@ -110,15 +110,15 @@ def _filter_mode(val):
     ]
     if val in str_options:
         return val
-    # This can be executed before Theano is completly imported, so
-    # theano.compile.mode.Mode is not always available.
-    # Instead of isinstance(val, theano.compile.mode.Mode),
+    # This can be executed before Aesara is completly imported, so
+    # aesara.compile.mode.Mode is not always available.
+    # Instead of isinstance(val, aesara.compile.mode.Mode),
     # we can inspect the __mro__ of the object!
     for type_ in type(val).__mro__:
-        if "theano.compile.mode.Mode" in str(type_):
+        if "aesara.compile.mode.Mode" in str(type_):
             return val
     raise ValueError(
-        f"Expected one of {str_options}, or an instance of theano.compile.mode.Mode. "
+        f"Expected one of {str_options}, or an instance of aesara.compile.mode.Mode. "
         f"Instead got: {val}."
     )
 
@@ -204,7 +204,7 @@ def short_platform(r=None, p=None):
     """
     Return a safe shorter version of platform.platform().
 
-    The old default Theano compiledir used platform.platform in
+    The old default Aesara compiledir used platform.platform in
     it. This use the platform.version() as a substring. This is too
     specific as it contain the full kernel number and package
     version. This cause the compiledir to change each time there is a
@@ -466,7 +466,7 @@ def add_basic_configvars():
     config.add(
         "unpickle_function",
         (
-            "Replace unpickled Theano functions with None. "
+            "Replace unpickled Aesara functions with None. "
             "This is useful to unpickle old graphs that pickled"
             " them when it shouldn't"
         ),
@@ -476,7 +476,7 @@ def add_basic_configvars():
 
     config.add(
         "reoptimize_unpickled_function",
-        "Re-optimize the graph when a theano function is unpickled from the disk.",
+        "Re-optimize the graph when an Aesara function is unpickled from the disk.",
         BoolParam(False, mutable=True),
         in_c_key=False,
     )
@@ -663,7 +663,7 @@ def add_compile_configvars():
         # Keep the default linker the same as the one for the mode FAST_RUN
         config.add(
             "linker",
-            "Default linker used if the theano flags mode is Mode",
+            "Default linker used if the aesara flags mode is Mode",
             EnumStr(
                 "cvm", ["c|py", "py", "c", "c|py_nogc", "vm", "vm_nogc", "cvm_nogc"]
             ),
@@ -674,24 +674,24 @@ def add_compile_configvars():
         # linker should default to python only.
         config.add(
             "linker",
-            "Default linker used if the theano flags mode is Mode",
+            "Default linker used if the aesara flags mode is Mode",
             EnumStr("vm", ["py", "vm_nogc"]),
             in_c_key=False,
         )
         if type(config).cxx.is_default:
             # If the user provided an empty value for cxx, do not warn.
             _logger.warning(
-                "g++ not detected ! Theano will be unable to execute "
+                "g++ not detected ! Aesara will be unable to execute "
                 "optimized C-implementations (for both CPU and GPU) and will "
                 "default to Python implementations. Performance will be severely "
-                "degraded. To remove this warning, set Theano flags cxx to an "
+                "degraded. To remove this warning, set Aesara flags cxx to an "
                 "empty string."
             )
 
     # Keep the default value the same as the one for the mode FAST_RUN
     config.add(
         "allow_gc",
-        "Do we default to delete intermediate results during Theano"
+        "Do we default to delete intermediate results during Aesara"
         " function calls? Doing so lowers the memory requirement, but"
         " asks that we reallocate memory at the next function call."
         " This is implemented for the default linker, but may not work"
@@ -738,7 +738,7 @@ def add_compile_configvars():
     config.add(
         "on_unused_input",
         "What to do if a variable in the 'inputs' list of "
-        " theano.function() is not used in the graph.",
+        " aesara.function() is not used in the graph.",
         EnumStr("raise", ["warn", "ignore"]),
         in_c_key=False,
     )
@@ -764,7 +764,7 @@ def add_compile_configvars():
     config.add(
         "cmodule__remove_gxx_opt",
         "If True, will remove the -O* parameter passed to g++."
-        "This is useful to debug in gdb modules compiled by Theano."
+        "This is useful to debug in gdb modules compiled by Aesara."
         "The parameter -g is passed by default to g++",
         BoolParam(False),
         # TODO: change so that this isn't needed.
@@ -788,7 +788,7 @@ def add_compile_configvars():
 
     config.add(
         "cmodule__age_thresh_use",
-        "In seconds. The time after which " "Theano won't reuse a compile c module.",
+        "In seconds. The time after which " "Aesara won't reuse a compile c module.",
         # 24 days
         IntParam(60 * 60 * 24 * 24, mutable=False),
         in_c_key=False,
@@ -835,13 +835,13 @@ def _is_valid_cmp_sloppy(v):
 
 def add_tensor_configvars():
 
-    # This flag is used when we import Theano to initialize global variables.
+    # This flag is used when we import Aesara to initialize global variables.
     # So changing it after import will not modify these global variables.
     # This could be done differently... but for now we simply prevent it from being
     # changed at runtime.
     config.add(
         "tensor__cmp_sloppy",
-        "Relax theano.tensor.math._allclose (0) not at all, (1) a bit, (2) more",
+        "Relax aesara.tensor.math._allclose (0) not at all, (1) a bit, (2) more",
         IntParam(0, _is_valid_cmp_sloppy, mutable=False),
         in_c_key=False,
     )
@@ -879,9 +879,9 @@ def add_traceback_configvars():
         "The number of stack to trace. -1 mean all.",
         # We default to a number to be able to know where v1 + v2 is created in the
         # user script. The bigger this number is, the more run time it takes.
-        # We need to default to 8 to support theano.tensor.type.tensor(...).
-        # import theano, numpy
-        # X = theano.tensor.matrix()
+        # We need to default to 8 to support aesara.tensor.type.tensor(...).
+        # import aesara, numpy
+        # X = aesara.tensor.matrix()
         # y = X.reshape((5,3,1))
         # assert y.tag.trace
         IntParam(8),
@@ -891,7 +891,7 @@ def add_traceback_configvars():
     config.add(
         "traceback__compile_limit",
         "The number of stack to trace to keep during compilation. -1 mean all."
-        " If greater then 0, will also make us save Theano internal stack trace.",
+        " If greater then 0, will also make us save Aesara internal stack trace.",
         IntParam(0),
         in_c_key=False,
     )
@@ -942,9 +942,9 @@ def add_error_and_warning_configvars():
     config.add(
         "warn__ignore_bug_before",
         (
-            "If 'None', we warn about all Theano bugs found by default. "
-            "If 'all', we don't warn about Theano bugs found by default. "
-            "If a version, we print only the warnings relative to Theano "
+            "If 'None', we warn about all Aesara bugs found by default. "
+            "If 'all', we don't warn about Aesara bugs found by default. "
+            "If a version, we print only the warnings relative to Aesara "
             "bugs found after that version. "
             "Warning for specific bugs can be configured with specific "
             "[warn] flags."
@@ -1009,7 +1009,7 @@ def add_testvalue_and_checking_configvars():
     config.add(
         "print_test_value",
         (
-            "If 'True', the __eval__ of a Theano variable will return its test_value "
+            "If 'True', the __eval__ of an Aesara variable will return its test_value "
             "when this is available. This has the practical conseguence that, e.g., "
             "in debugging `my_var` will print the same as `my_var.tag.test_value` "
             "when a test value is defined."
@@ -1021,7 +1021,7 @@ def add_testvalue_and_checking_configvars():
     config.add(
         "compute_test_value",
         (
-            "If 'True', Theano will run each op at graph build time, using "
+            "If 'True', Aesara will run each op at graph build time, using "
             "Constants, SharedVariables and the tag 'test_value' as inputs "
             "to the function. This helps the user track down problems in the "
             "graph before it gets optimized."
@@ -1033,9 +1033,9 @@ def add_testvalue_and_checking_configvars():
     config.add(
         "compute_test_value_opt",
         (
-            "For debugging Theano optimization only."
+            "For debugging Aesara optimization only."
             " Same as compute_test_value, but is used"
-            " during Theano optimization"
+            " during Aesara optimization"
         ),
         EnumStr("off", ["ignore", "warn", "raise", "pdb"]),
         in_c_key=False,
@@ -1215,7 +1215,7 @@ def add_testvalue_and_checking_configvars():
 
     config.add(
         "profiling__ignore_first_call",
-        """Do we ignore the first call of a Theano function.""",
+        """Do we ignore the first call of an Aesara function.""",
         BoolParam(False),
         in_c_key=False,
     )
@@ -1249,7 +1249,7 @@ def add_multiprocessing_configvars():
                 " We disable openmp by default. To remove this"
                 " warning, set the environment variable"
                 " OMP_NUM_THREADS to the number of threads you"
-                " want theano to use."
+                " want aesara to use."
             )
         default_openmp = count > 1
 
@@ -1263,13 +1263,13 @@ def add_multiprocessing_configvars():
         "Allow (or not) parallel computation on the CPU with OpenMP. "
         "This is the default value used when creating an Op that "
         "supports OpenMP parallelization. It is preferable to define it "
-        "via the Theano configuration file ~/.theanorc or with the "
-        "environment variable THEANO_FLAGS. Parallelization is only "
+        "via the Aesara configuration file ~/.aesararc or with the "
+        "environment variable AESARA_FLAGS. Parallelization is only "
         "done for some operations that implement it, and even for "
         "operations that implement parallelism, each operation is free "
         "to respect this flag or not. You can control the number of "
         "threads used with the environment variable OMP_NUM_THREADS."
-        " If it is set to 1, we disable openmp in Theano by default.",
+        " If it is set to 1, we disable openmp in Aesara by default.",
         BoolParam(default_openmp),
         in_c_key=False,
     )
@@ -1438,7 +1438,7 @@ def add_deprecated_configvars():
     # TODO: remove?
     config.add(
         "warn__identify_1pexp_bug",
-        "Warn if Theano versions prior to 7987b51 (2011-12-18) could have "
+        "Warn if Aesara versions prior to 7987b51 (2011-12-18) could have "
         "yielded a wrong result due to a bug in the is_1pexp function",
         BoolParam(_warn_default("0.4.1")),
         in_c_key=False,
@@ -1464,8 +1464,8 @@ def add_deprecated_configvars():
     config.add(
         "warn__argmax_pushdown_bug",
         (
-            "Warn if in past version of Theano we generated a bug with the "
-            "theano.tensor.nnet.basic.local_argmax_pushdown optimization. "
+            "Warn if in past version of Aesara we generated a bug with the "
+            "aesara.tensor.nnet.basic.local_argmax_pushdown optimization. "
             "Was fixed 27 may 2010"
         ),
         BoolParam(_warn_default("0.3")),
@@ -1475,7 +1475,7 @@ def add_deprecated_configvars():
     config.add(
         "warn__gpusum_01_011_0111_bug",
         (
-            "Warn if we are in a case where old version of Theano had a "
+            "Warn if we are in a case where old version of Aesara had a "
             "silent bug with GpuSum pattern 01,011 and 0111 when the first "
             "dimensions was bigger then 4096. Was fixed 31 may 2010"
         ),
@@ -1486,7 +1486,7 @@ def add_deprecated_configvars():
     config.add(
         "warn__sum_sum_bug",
         (
-            "Warn if we are in a case where Theano version between version "
+            "Warn if we are in a case where Aesara version between version "
             "9923a40c7b7a and the 2 august 2010 (fixed date), generated an "
             "error in that case. This happens when there are 2 consecutive "
             "sums in the graph, bad code was generated. "
@@ -1499,7 +1499,7 @@ def add_deprecated_configvars():
     config.add(
         "warn__sum_div_dimshuffle_bug",
         (
-            "Warn if previous versions of Theano (between rev. "
+            "Warn if previous versions of Aesara (between rev. "
             "3bd9b789f5e8, 2010-06-16, and cfc6322e5ad4, 2010-08-03) "
             "would have given incorrect result. This bug was triggered by "
             "sum of division of dimshuffled tensors."
@@ -1510,7 +1510,7 @@ def add_deprecated_configvars():
 
     config.add(
         "warn__subtensor_merge_bug",
-        "Warn if previous versions of Theano (before 0.5rc2) could have given "
+        "Warn if previous versions of Aesara (before 0.5rc2) could have given "
         "incorrect results when indexing into a subtensor with negative "
         "stride (for instance, for instance, x[a:b:-1][c]).",
         BoolParam(_warn_default("0.5")),
@@ -1519,7 +1519,7 @@ def add_deprecated_configvars():
 
     config.add(
         "warn__gpu_set_subtensor1",
-        "Warn if previous versions of Theano (before 0.6) could have given "
+        "Warn if previous versions of Aesara (before 0.6) could have given "
         "incorrect results when moving to the gpu "
         "set_subtensor(x[int vector], new_value)",
         BoolParam(_warn_default("0.6")),
@@ -1528,12 +1528,12 @@ def add_deprecated_configvars():
 
     config.add(
         "warn__vm_gc_bug",
-        "There was a bug that existed in the default Theano configuration,"
+        "There was a bug that existed in the default Aesara configuration,"
         " only in the development version between July 5th 2012"
         " and July 30th 2012. This was not in a released version."
         " If your code was affected by this bug, a warning"
         " will be printed during the code execution if you use the"
-        " `linker=vm,vm__lazy=True,warn__vm_gc_bug=True` Theano flags."
+        " `linker=vm,vm__lazy=True,warn__vm_gc_bug=True` Aesara flags."
         " This warning is disabled by default as the bug was not released.",
         BoolParam(False),
         in_c_key=False,
@@ -1552,10 +1552,10 @@ def add_deprecated_configvars():
     config.add(
         "warn__reduce_join",
         (
-            "Your current code is fine, but Theano versions "
+            "Your current code is fine, but Aesara versions "
             "prior to 0.7 (or this development version) "
             "might have given an incorrect result. "
-            "To disable this warning, set the Theano flag "
+            "To disable this warning, set the Aesara flag "
             "warn__reduce_join to False. The problem was an "
             "optimization, that modified the pattern "
             '"Reduce{scalar.op}(Join(axis=0, a, b), axis=0)", '
@@ -1569,7 +1569,7 @@ def add_deprecated_configvars():
     config.add(
         "warn__inc_set_subtensor1",
         (
-            "Warn if previous versions of Theano (before 0.7) could have "
+            "Warn if previous versions of Aesara (before 0.7) could have "
             "given incorrect results for inc_subtensor and set_subtensor "
             "when using some patterns of advanced indexing (indexing with "
             "one vector or matrix of ints)."
@@ -1589,7 +1589,7 @@ def add_deprecated_configvars():
 
     config.add(
         "warn__inc_subtensor1_opt",
-        "Warn if previous versions of Theano (before 0.10) could have "
+        "Warn if previous versions of Aesara (before 0.10) could have "
         "given incorrect results when computing "
         "inc_subtensor(zeros[idx], x)[idx], when idx is an array of integers "
         "with duplicated values.",
@@ -1657,7 +1657,7 @@ def _filter_compiledir(path):
         try:
             os.makedirs(path, 0o770)  # read-write-execute for user and group
         except OSError as e:
-            # Maybe another parallel execution of theano was trying to create
+            # Maybe another parallel execution of aesara was trying to create
             # the same directory at the same time.
             if e.errno != errno.EEXIST:
                 raise ValueError(
@@ -1706,7 +1706,7 @@ def _get_home_dir():
     "python_version": platform.python_version(),
     "python_bitwidth": LOCAL_BITWIDTH,
     "python_int_bitwidth": PYTHON_INT_BITWIDTH,
-    "theano_version": theano.__version__,
+    "aesara_version": aesara.__version__,
     "numpy_version": np.__version__,
     "gxx_version": "xxx",
     "hostname": socket.gethostname(),
@@ -1747,9 +1747,9 @@ def add_caching_dir_configvars():
     # part of the roaming part of the user profile. Instead we use the local part
     # of the user profile, when available.
     if sys.platform == "win32" and os.getenv("LOCALAPPDATA") is not None:
-        default_base_compiledir = os.path.join(os.getenv("LOCALAPPDATA"), "Theano")
+        default_base_compiledir = os.path.join(os.getenv("LOCALAPPDATA"), "Aesara")
     else:
-        default_base_compiledir = os.path.join(_get_home_dir(), ".theano")
+        default_base_compiledir = os.path.join(_get_home_dir(), ".aesara")
 
     config.add(
         "base_compiledir",
@@ -1779,7 +1779,7 @@ def add_caching_dir_configvars():
     )
 
 
-# Those are the options provided by Theano to choose algorithms at runtime.
+# Those are the options provided by Aesara to choose algorithms at runtime.
 SUPPORTED_DNN_CONV_ALGO_RUNTIME = (
     "guess_once",
     "guess_on_shape_change",
@@ -1787,7 +1787,7 @@ def add_caching_dir_configvars():
     "time_on_shape_change",
 )
 
-# Those are the supported algorithm by Theano,
+# Those are the supported algorithm by Aesara,
 # The tests will reference those lists.
 SUPPORTED_DNN_CONV_ALGO_FWD = (
     "small",
@@ -1825,13 +1825,13 @@ def add_caching_dir_configvars():
     "float64",
 )
 
-# Eventually, the instance of `TheanoConfigParser` should be created right here,
+# Eventually, the instance of `AesaraConfigParser` should be created right here,
 # where it is also populated with settings.  But for a transition period, it
 # remains as `configparser._config`, while everybody accessing it through
 # `configparser.config` is flooded with deprecation warnings. These warnings
-# instruct one to use `theano.config`, which is an alias for
-# `theano.configdefaults.config`.
-config = theano.configparser._config
+# instruct one to use `aesara.config`, which is an alias for
+# `aesara.configdefaults.config`.
+config = aesara.configparser._config
 
 # The functions below register config variables into the config instance above.
 add_basic_configvars()
diff --git a/theano/configparser.py b/aesara/configparser.py
similarity index 87%
rename from theano/configparser.py
rename to aesara/configparser.py
index 89a00e001e..df78b0a357 100644
--- a/theano/configparser.py
+++ b/aesara/configparser.py
@@ -14,13 +14,13 @@
 from functools import wraps
 from io import StringIO
 
-from theano.utils import deprecated, hash_from_code
+from aesara.utils import deprecated, hash_from_code
 
 
-_logger = logging.getLogger("theano.configparser")
+_logger = logging.getLogger("aesara.configparser")
 
 
-class TheanoConfigWarning(Warning):
+class AesaraConfigWarning(Warning):
     def warn(cls, message, stacklevel=0):
         warnings.warn(message, cls, stacklevel=stacklevel + 3)
 
@@ -67,7 +67,7 @@ def __exit__(self, *args):
 
 
 class _SectionRedirect:
-    """Functions as a mock property on the TheanoConfigParser.
+    """Functions as a mock property on the AesaraConfigParser.
 
     It redirects attribute access (to config subsectinos) to the
     new config variable properties that use "__" in their name.
@@ -87,13 +87,13 @@ def __getattr__(self, attr):
         return getattr(self._root, f"{self._section_name}__{attr}")
 
 
-class TheanoConfigParser:
+class AesaraConfigParser:
     """ Object that holds configuration settings. """
 
-    def __init__(self, flags_dict: dict, theano_cfg, theano_raw_cfg):
+    def __init__(self, flags_dict: dict, aesara_cfg, aesara_raw_cfg):
         self._flags_dict = flags_dict
-        self._theano_cfg = theano_cfg
-        self._theano_raw_cfg = theano_raw_cfg
+        self._aesara_cfg = aesara_cfg
+        self._aesara_raw_cfg = aesara_raw_cfg
         self._config_var_dict = {}
         super().__init__()
 
@@ -134,7 +134,7 @@ def get_config_hash(self):
         )
 
     def add(self, name, doc, configparam, in_c_key=True):
-        """Add a new variable to TheanoConfigParser.
+        """Add a new variable to AesaraConfigParser.
 
         This method performs some of the work of initializing `ConfigParam` instances.
 
@@ -184,10 +184,10 @@ def add(self, name, doc, configparam, in_c_key=True):
                 configparam.__get__(self, type(self), delete_key=True)
             except KeyError:
                 # This is raised because the underlying `ConfigParser` in
-                # `self._theano_cfg` does not contain an entry for the given
+                # `self._aesara_cfg` does not contain an entry for the given
                 # section and/or value.
                 _logger.info(
-                    f"Suppressed KeyError in TheanoConfigParser.add for parameter '{name}'!"
+                    f"Suppressed KeyError in AesaraConfigParser.add for parameter '{name}'!"
                 )
 
         # the ConfigParam implements __get__/__set__, enabling us to create a property:
@@ -211,8 +211,8 @@ def fetch_val_for_key(self, key, delete_key=False):
         An unsuccessful search raises a KeyError
 
         The (decreasing) priority order is:
-        - THEANO_FLAGS
-        - ~./theanorc
+        - AESARA_FLAGS
+        - ~./aesararc
 
         """
 
@@ -235,16 +235,16 @@ def fetch_val_for_key(self, key, delete_key=False):
             section, option = "global", key
         try:
             try:
-                return self._theano_cfg.get(section, option)
+                return self._aesara_cfg.get(section, option)
             except InterpolationError:
-                return self._theano_raw_cfg.get(section, option)
+                return self._aesara_raw_cfg.get(section, option)
         except (NoOptionError, NoSectionError):
             raise KeyError(key)
 
     def change_flags(self, *args, **kwargs) -> _ChangeFlagsDecorator:
         """
         Use this as a decorator or context manager to change the value of
-        Theano config variables.
+        Aesara config variables.
 
         Useful during tests.
         """
@@ -252,7 +252,7 @@ def change_flags(self, *args, **kwargs) -> _ChangeFlagsDecorator:
 
     def warn_unused_flags(self):
         for key in self._flags_dict.keys():
-            warnings.warn(f"Theano does not recognise this flag: {key}")
+            warnings.warn(f"Aesara does not recognise this flag: {key}")
 
 
 class ConfigParam:
@@ -263,7 +263,7 @@ class ConfigParam:
     that can be context-dependent.
 
     This class implements __get__ and __set__ methods to eventually become
-    a property on an instance of TheanoConfigParser.
+    a property on an instance of AesaraConfigParser.
     """
 
     def __init__(
@@ -296,13 +296,13 @@ def __init__(
         self._validate = validate
         self._mutable = mutable
         self.is_default = True
-        # set by TheanoConfigParser.add:
+        # set by AesaraConfigParser.add:
         self.name = None
         self.doc = None
         self.in_c_key = None
 
         # Note that we do not call `self.filter` on the default value: this
-        # will be done automatically in TheanoConfigParser.add, potentially with a
+        # will be done automatically in AesaraConfigParser.add, potentially with a
         # more appropriate user-provided default value.
         # Calling `filter` here may actually be harmful if the default value is
         # invalid and causes a crash or has unwanted side effects.
@@ -346,7 +346,7 @@ def __get__(self, cls, type_, delete_key=False):
             return self
         if self.name not in cls._config_var_dict:
             raise ConfigAccessViolation(
-                f"The config parameter '{self.name}' was registered on a different instance of the TheanoConfigParser."
+                f"The config parameter '{self.name}' was registered on a different instance of the AesaraConfigParser."
                 f" It is not accessible through the instance with id '{id(cls)}' because of safeguarding."
             )
         if not hasattr(self, "val"):
@@ -462,8 +462,8 @@ def _apply(self, val):
         elif val.startswith("gpu"):
             raise ValueError(
                 "You are tring to use the old GPU back-end. "
-                "It was removed from Theano. Use device=cuda* now. "
-                "See https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29 "
+                "It was removed from Aesara. Use device=cuda* now. "
+                "See https://github.com/pymc-devs/aesara/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29 "
                 "for more information."
             )
         else:
@@ -508,7 +508,7 @@ def parse_config_string(config_string, issue_warnings=True):
         kv_tuple = kv_pair.split("=", 1)
         if len(kv_tuple) == 1:
             if issue_warnings:
-                TheanoConfigWarning.warn(
+                AesaraConfigWarning.warn(
                     f"Config key '{kv_tuple[0]}' has no value, ignoring it",
                     stacklevel=1,
                 )
@@ -519,32 +519,34 @@ def parse_config_string(config_string, issue_warnings=True):
     return config_dict
 
 
-def config_files_from_theanorc():
+def config_files_from_aesararc():
     """
-    THEANORC can contain a colon-delimited list of config files, like
-    THEANORC=~lisa/.theanorc:~/.theanorc
-    In that case, definitions in files on the right (here, ~/.theanorc) have
-    precedence over those in files on the left.
+    AESARARC can contain a colon-delimited list of config files, like
+
+        AESARARC=~/.aesararc:/etc/.aesararc
+
+    In that case, definitions in files on the right (here, ``~/.aesararc``)
+    have precedence over those in files on the left.
     """
     rval = [
         os.path.expanduser(s)
-        for s in os.getenv("THEANORC", "~/.theanorc").split(os.pathsep)
+        for s in os.getenv("AESARARC", "~/.aesararc").split(os.pathsep)
     ]
-    if os.getenv("THEANORC") is None and sys.platform == "win32":
+    if os.getenv("AESARARC") is None and sys.platform == "win32":
         # to don't need to change the filename and make it open easily
-        rval.append(os.path.expanduser("~/.theanorc.txt"))
+        rval.append(os.path.expanduser("~/.aesararc.txt"))
     return rval
 
 
 def _create_default_config():
-    # The THEANO_FLAGS environment variable should be a list of comma-separated
+    # The AESARA_FLAGS environment variable should be a list of comma-separated
     # [section__]option=value entries. If the section part is omitted, there should
     # be only one section that contains the given option.
-    THEANO_FLAGS = os.getenv("THEANO_FLAGS", "")
-    THEANO_FLAGS_DICT = parse_config_string(THEANO_FLAGS, issue_warnings=True)
+    AESARA_FLAGS = os.getenv("AESARA_FLAGS", "")
+    AESARA_FLAGS_DICT = parse_config_string(AESARA_FLAGS, issue_warnings=True)
 
-    config_files = config_files_from_theanorc()
-    theano_cfg = ConfigParser(
+    config_files = config_files_from_aesararc()
+    aesara_cfg = ConfigParser(
         {
             "USER": os.getenv("USER", os.path.split(os.path.expanduser("~"))[-1]),
             "LSCRATCH": os.getenv("LSCRATCH", ""),
@@ -554,26 +556,26 @@ def _create_default_config():
             "PID": str(os.getpid()),
         }
     )
-    theano_cfg.read(config_files)
+    aesara_cfg.read(config_files)
     # Having a raw version of the config around as well enables us to pass
     # through config values that contain format strings.
     # The time required to parse the config twice is negligible.
-    theano_raw_cfg = RawConfigParser()
-    theano_raw_cfg.read(config_files)
+    aesara_raw_cfg = RawConfigParser()
+    aesara_raw_cfg.read(config_files)
 
-    # Instances of TheanoConfigParser can have independent current values!
+    # Instances of AesaraConfigParser can have independent current values!
     # But because the properties are assinged to the type, their existence is global.
-    config = TheanoConfigParser(
-        flags_dict=THEANO_FLAGS_DICT,
-        theano_cfg=theano_cfg,
-        theano_raw_cfg=theano_raw_cfg,
+    config = AesaraConfigParser(
+        flags_dict=AESARA_FLAGS_DICT,
+        aesara_cfg=aesara_cfg,
+        aesara_raw_cfg=aesara_raw_cfg,
     )
     return config
 
 
 class _ConfigProxy:
     """Like _SectionRedirect this class enables backwards-compatible access to the
-    config settings, but raises DeprecationWarnings with instructions to use `theano.config`.
+    config settings, but raises DeprecationWarnings with instructions to use `aesara.config`.
     """
 
     def __init__(self, actual):
@@ -583,8 +585,8 @@ def __getattr__(self, attr):
         if attr == "_actual":
             return _ConfigProxy._actual
         warnings.warn(
-            "Accessing config through `theano.configparser.config` is deprecated. "
-            "Use `theano.config` instead.",
+            "Accessing config through `aesara.configparser.config` is deprecated. "
+            "Use `aesara.config` instead.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -594,8 +596,8 @@ def __setattr__(self, attr, value):
         if attr == "_actual":
             return setattr(_ConfigProxy._actual, attr, value)
         warnings.warn(
-            "Accessing config through `theano.configparser.config` is deprecated. "
-            "Use `theano.config` instead.",
+            "Accessing config through `aesara.configparser.config` is deprecated. "
+            "Use `aesara.config` instead.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -607,15 +609,15 @@ def __setattr__(self, attr, value):
 _config = _create_default_config()
 
 # The old API often imported the default config object from `configparser`.
-# These imports/accesses should be replaced with `theano.config`, so this wraps
+# These imports/accesses should be replaced with `aesara.config`, so this wraps
 # it with warnings:
 config = _ConfigProxy(_config)
 # We can't alias the methods of the `config` variable above without already
 # triggering the warning.  Instead, we wrap the methods of the actual instance
 # with warnings:
-change_flags = deprecated("Use theano.config.change_flags instead!")(
+change_flags = deprecated("Use aesara.config.change_flags instead!")(
     _config.change_flags
 )
-_config_print = deprecated("Use theano.config.config_print instead!")(
+_config_print = deprecated("Use aesara.config.config_print instead!")(
     _config.config_print
 )
diff --git a/aesara/d3viz/__init__.py b/aesara/d3viz/__init__.py
new file mode 100644
index 0000000000..33c4e41e7b
--- /dev/null
+++ b/aesara/d3viz/__init__.py
@@ -0,0 +1 @@
+from aesara.d3viz.d3viz import d3viz, d3write
diff --git a/theano/d3viz/css/d3-context-menu.css b/aesara/d3viz/css/d3-context-menu.css
similarity index 100%
rename from theano/d3viz/css/d3-context-menu.css
rename to aesara/d3viz/css/d3-context-menu.css
diff --git a/theano/d3viz/css/d3viz.css b/aesara/d3viz/css/d3viz.css
similarity index 100%
rename from theano/d3viz/css/d3viz.css
rename to aesara/d3viz/css/d3viz.css
diff --git a/theano/d3viz/d3viz.py b/aesara/d3viz/d3viz.py
similarity index 85%
rename from theano/d3viz/d3viz.py
rename to aesara/d3viz/d3viz.py
index 0180b9a31a..8e56cba81d 100644
--- a/theano/d3viz/d3viz.py
+++ b/aesara/d3viz/d3viz.py
@@ -1,4 +1,4 @@
-"""Dynamic visualization of Theano graphs.
+"""Dynamic visualization of Aesara graphs.
 
 Author: Christof Angermueller <cangermueller@gmail.com>
 """
@@ -8,7 +8,7 @@
 import os
 import shutil
 
-from theano.d3viz.formatting import PyDotFormatter
+from aesara.d3viz.formatting import PyDotFormatter
 
 
 __path__ = os.path.dirname(os.path.realpath(__file__))
@@ -42,7 +42,7 @@ def safe_json(obj):
 
 
 def d3viz(fct, outfile, copy_deps=True, *args, **kwargs):
-    """Create HTML file with dynamic visualizing of a Theano function graph.
+    """Create HTML file with dynamic visualizing of an Aesara function graph.
 
     In the HTML file, the whole graph or single nodes can be moved by drag and
     drop. Zooming is possible via the mouse wheel. Detailed information about
@@ -60,8 +60,8 @@ def d3viz(fct, outfile, copy_deps=True, *args, **kwargs):
 
     Parameters
     ----------
-    fct : theano.compile.function.types.Function
-        A compiled Theano function, variable, apply or a list of variables.
+    fct : aesara.compile.function.types.Function
+        A compiled Aesara function, variable, apply or a list of variables.
     outfile : str
         Path to output HTML file.
     copy_deps : bool, optional
@@ -70,7 +70,7 @@ def d3viz(fct, outfile, copy_deps=True, *args, **kwargs):
     Notes
     -----
     This function accepts extra parameters which will be forwarded to
-    :class:`theano.d3viz.formatting.PyDotFormatter`.
+    :class:`aesara.d3viz.formatting.PyDotFormatter`.
 
     """
 
@@ -115,19 +115,19 @@ def d3viz(fct, outfile, copy_deps=True, *args, **kwargs):
 
 
 def d3write(fct, path, *args, **kwargs):
-    """Convert Theano graph to pydot graph and write to dot file.
+    """Convert Aesara graph to pydot graph and write to dot file.
 
     Parameters
     ----------
-    fct : theano.compile.function.types.Function
-        A compiled Theano function, variable, apply or a list of variables.
+    fct : aesara.compile.function.types.Function
+        A compiled Aesara function, variable, apply or a list of variables.
     path: str
         Path to output file
 
     Notes
     -----
     This function accepts extra parameters which will be forwarded to
-    :class:`theano.d3viz.formatting.PyDotFormatter`.
+    :class:`aesara.d3viz.formatting.PyDotFormatter`.
 
     """
 
diff --git a/theano/d3viz/formatting.py b/aesara/d3viz/formatting.py
similarity index 94%
rename from theano/d3viz/formatting.py
rename to aesara/d3viz/formatting.py
index 331df152c0..196fa1de80 100644
--- a/theano/d3viz/formatting.py
+++ b/aesara/d3viz/formatting.py
@@ -1,4 +1,4 @@
-"""Functions for formatting Theano compute graphs.
+"""Functions for formatting Aesara compute graphs.
 
 Author: Christof Angermueller <cangermueller@gmail.com>
 """
@@ -7,21 +7,21 @@
 
 import numpy as np
 
-import theano
-from theano.compile import Function, builders
-from theano.graph.basic import Apply, Constant, Variable, graph_inputs
-from theano.graph.fg import FunctionGraph
-from theano.printing import pydot_imported, pydot_imported_msg
+import aesara
+from aesara.compile import Function, builders
+from aesara.graph.basic import Apply, Constant, Variable, graph_inputs
+from aesara.graph.fg import FunctionGraph
+from aesara.printing import pydot_imported, pydot_imported_msg
 
 
 try:
-    from theano.printing import pd
+    from aesara.printing import pd
 except ImportError:
     pass
 
 
 class PyDotFormatter:
-    """Create `pydot` graph object from Theano function.
+    """Create `pydot` graph object from Aesara function.
 
     Parameters
     ----------
@@ -69,7 +69,7 @@ def __add_node(self, node):
 
         Parameters
         ----------
-        node : Theano graph node
+        node : Aesara graph node
             Apply node, tensor variable, or shared variable in compute graph.
 
         Returns
@@ -87,7 +87,7 @@ def __node_id(self, node):
 
         Parameters
         ----------
-        node : Theano graph node
+        node : Aesara graph node
             Apply node, tensor variable, or shared variable in compute graph.
 
         Returns
@@ -105,8 +105,8 @@ def __call__(self, fct, graph=None):
 
         Parameters
         ----------
-        fct : theano.compile.function.types.Function
-            A compiled Theano function, variable, apply or a list of variables.
+        fct : aesara.compile.function.types.Function
+            A compiled Aesara function, variable, apply or a list of variables.
         graph: pydot.Dot
             `pydot` graph to which nodes are added. Creates new one if
             undefined.
@@ -175,7 +175,7 @@ def __call__(self, fct, graph=None):
                     }
                     if isinstance(var, Constant):
                         vparams["node_type"] = "constant_input"
-                    elif isinstance(var, theano.tensor.sharedvar.TensorSharedVariable):
+                    elif isinstance(var, aesara.tensor.sharedvar.TensorSharedVariable):
                         vparams["node_type"] = "shared_input"
                     vparams["dtype"] = type_to_str(var.type)
                     vparams["tag"] = var_tag(var)
diff --git a/theano/d3viz/html/template.html b/aesara/d3viz/html/template.html
similarity index 100%
rename from theano/d3viz/html/template.html
rename to aesara/d3viz/html/template.html
diff --git a/theano/d3viz/js/d3-context-menu.js b/aesara/d3viz/js/d3-context-menu.js
similarity index 100%
rename from theano/d3viz/js/d3-context-menu.js
rename to aesara/d3viz/js/d3-context-menu.js
diff --git a/theano/d3viz/js/d3.v3.min.js b/aesara/d3viz/js/d3.v3.min.js
similarity index 100%
rename from theano/d3viz/js/d3.v3.min.js
rename to aesara/d3viz/js/d3.v3.min.js
diff --git a/theano/d3viz/js/d3viz.js b/aesara/d3viz/js/d3viz.js
similarity index 97%
rename from theano/d3viz/js/d3viz.js
rename to aesara/d3viz/js/d3viz.js
index 1f4ff4344d..dbcfda40b2 100644
--- a/theano/d3viz/js/d3viz.js
+++ b/aesara/d3viz/js/d3viz.js
@@ -1,5 +1,5 @@
 /*
- * Theano javascript library for interactive visualization.
+ * Aesara javascript library for interactive visualization.
  *
  * Author: Christof Angermueller <cangermueller@gmail.com>
 */
@@ -51,7 +51,7 @@ function flipAxes(nodes) {
 		var node = nodes[i];
 		size[0] = Math.max(size[0], node.pos[0] + node.width);
 		size[1] = Math.max(size[1], node.pos[1] + node.height);
-	}	
+	}
 	for (var i in nodes) {
 		var node = nodes[i];
 		node.pos[1] = size[1] - (node.pos[1] + node.height);
@@ -73,7 +73,7 @@ function processDotGraph(dotGraph) {
 			dotGraph.rnodes[nodeId] = node;
 		}
 	}
-	
+
 	// Precompute attributes
 	var i = 0;
 	for (var nodeId in dotGraph.rnodes) {
@@ -100,9 +100,9 @@ function processDotGraph(dotGraph) {
 			node.subg_map_outputs = eval(node.subg_map_outputs)
 		}
 	}
-	
+
 	flipAxes(dotGraph.rnodes);
-	
+
 	// Offset and scale positions
 	var posMin = [Infinity, Infinity];
 	for (var i in dotGraph.rnodes) {
@@ -118,7 +118,7 @@ function processDotGraph(dotGraph) {
 		pos[0] = 1.2 * pos[0];
 		pos[1] = 1.2 * pos[1];
 	}
-	
+
 	// Preprocess edges
 	var edges = dotGraph.edges();
 	for (var i in edges) {
@@ -157,12 +157,12 @@ function parseProfile(s) {
  * Preprocesses DOT nodes for front-end visualization.
  * Assigns all children of parent (root of graph if not specified)
  * to the same group and calls function recursively on children.
- * 
+ *
  */
 function traverseChilds(dotGraph, nodes, groups, parent) {
 	var preId = '';
 	var ref = undefined;
-	
+
 	// Create new group with parent as parent
 	var group = {'id': groups.length, 'nodes': [], 'parent': parent};
 	if (exists(parent)) {
@@ -172,7 +172,7 @@ function traverseChilds(dotGraph, nodes, groups, parent) {
 		parent.group = group;
 	}
 	groups.push(group);
-	
+
 	// Loop over all children
 	var childs = dotGraph.children(ref);
 	for (var i in childs) {
@@ -197,7 +197,7 @@ function traverseChilds(dotGraph, nodes, groups, parent) {
 			group.nodes.push(node);
 		}
 	}
-	
+
 	// Groups appended to groups after group are group children.
 	group.childs = [];
 	for (var i = group.id + 1; i < groups.length; ++i) {
@@ -232,26 +232,26 @@ function groupSize(nodes) {
 function frontEndGraph(dotGraph, prevGraph) {
 	var graph = {'nodes': [], 'groups': []};
 	traverseChilds(dotGraph, graph.nodes, graph.groups);
-	
+
 	// Dictionary to access nodes by id
 	graph.nodesd = {};
 	for (var i in graph.nodes) {
 		var node = graph.nodes[i];
 		graph.nodesd[node.id] = node;
 	}
-	
+
 	// Dictionary to access groups by id
 	graph.groupsd = {};
 	for (var i in graph.groups) {
 		var group = graph.groups[i];
 		graph.groupsd[group.id] = group;
 	}
-	
+
 	// Parent nodes
 	graph.nodesp = graph.nodes.filter(function(d) {return d.isParent;});
 	// Non-parent nodes
 	graph.nodesn = graph.nodes.filter(function(d) {return !d.isParent;});
-	
+
 	// Compute size of groups
 	for (i in graph.groups) {
 		var group = graph.groups[i];
@@ -288,9 +288,9 @@ function frontEndGraph(dotGraph, prevGraph) {
 			}
 		}
 	}
-	
+
 	graph.size = graph.groups[0].size;
-	
+
 	// Reuse previous positions
 	if (exists(prevGraph)) {
 		for (var i in graph.nodes) {
@@ -312,15 +312,15 @@ function frontEndGraph(dotGraph, prevGraph) {
 			}
 		}
 	}
-	
+
 	// Edges
 	graph.edges = [];
-	
+
 	for (var i in graph.nodesn) {
 		for (var j in graph.nodesn) {
 			var source = graph.nodesn[i];
 			var target = graph.nodesn[j];
-			
+
 			var dotEdge = dotGraph.edge(source.value.id, target.value.id);
 			if (exists(dotEdge)) {
 				var edge = {};
@@ -329,9 +329,9 @@ function frontEndGraph(dotGraph, prevGraph) {
 				edge.value = dotEdge;
 				graph.edges.push(edge);
 			}
-			
+
 			// Redirect edges to subgraph
-			
+
 			function redirectEdges(map, dotEdge) {
 				for (var k in map) {
 					var kmap = map[k];
@@ -340,7 +340,7 @@ function frontEndGraph(dotGraph, prevGraph) {
 						edge.source = parseInt(source.index);
 						edge.target = parseInt(target.index);
 						edge.value = dotEdge;
-						graph.edges.push(edge);	
+						graph.edges.push(edge);
 					}
 				}
 			}
@@ -354,7 +354,7 @@ function frontEndGraph(dotGraph, prevGraph) {
 					redirectEdges(map, dotEdge);
 				}
 			}
-			
+
 			if (exists(source.parent)) {
 				var parent = source.parent;
 				var dotEdge = dotGraph.edge(parent.id, target.id);
@@ -388,7 +388,7 @@ function convexHulls(graph, offset) {
 				points.push([node.x - node.value.cx - offset, node.y - node.value.cy - offset]);
 				points.push([node.x - node.value.cx - offset, node.y + node.value.cy + offset]);
 				points.push([node.x + node.value.cx + offset, node.y - node.value.cy - offset]);
-				points.push([node.x + node.value.cx + offset, node.y + node.value.cy + offset]);	
+				points.push([node.x + node.value.cx + offset, node.y + node.value.cy + offset]);
 			}
 		}
 		for (var k in group.childs) {
@@ -399,7 +399,7 @@ function convexHulls(graph, offset) {
 					points.push([node.x - node.value.cx - offset, node.y - node.value.cy - offset]);
 					points.push([node.x - node.value.cx - offset, node.y + node.value.cy + offset]);
 					points.push([node.x + node.value.cx + offset, node.y - node.value.cy - offset]);
-					points.push([node.x + node.value.cx + offset, node.y + node.value.cy + offset]);	
+					points.push([node.x + node.value.cx + offset, node.y + node.value.cy + offset]);
 				}
 			}
 		}
@@ -439,7 +439,7 @@ function drawGraph() {
 			}
 		}
 	}
-	
+
 	var isEdgeOver = false;
 	var isEdgeLabelOver = false;
 
@@ -480,7 +480,7 @@ function drawGraph() {
 				updateGraph();
 			})
 		.on('dragend', function(d) {forceLayout.resume();});
-	
+
 	// Draw convex hull surrounding group of nodes
 	graph.hulls = convexHulls(graph);
 	hulls = pane.selectAll('#hulls').remove();
@@ -491,7 +491,7 @@ function drawGraph() {
 		.attr('class', 'hull')
 		.attr('d', drawConvexHull)
 		.call(dragHulls);
-		
+
 	// Event handler to open/close groups
 	hulls.on('dblclick', function(d) {
 		var group = graph.groups[d.group];
@@ -505,7 +505,7 @@ function drawGraph() {
 		graph = frontEndGraph(dotGraph, graph);
 		drawGraph();
 	});
-	
+
 	// Add edges
 	edges = pane.selectAll('#edges').remove();
 	edges = pane.append('g').attr('id', 'edges')
@@ -513,21 +513,21 @@ function drawGraph() {
 		.attr('class', 'edge')
 		.attr('stroke', function(d) {return d.value.color;})
 		.attr('marker-mid', function(d) { return 'url(#edgeArrow_' + d.value.type + ')';});
-		
+
 	edges.on('mouseover', function(d) {
 			var edge = d3.select(this);
 			edge.transition()
 				.duration(200)
 				.style('opacity', 1.0);
-		    edgeDiv.transition()        
-		        .duration(200)      
+		    edgeDiv.transition()
+		        .duration(200)
 		        .style('opacity', .9);
 		    edgeDiv
-		    	.html(d.value.label)  
-		        .style('left', (d3.event.pageX) + 'px')     
-		        .style('top', (d3.event.pageY - 28) + 'px');    
+		    	.html(d.value.label)
+		        .style('left', (d3.event.pageX) + 'px')
+		        .style('top', (d3.event.pageY - 28) + 'px');
 		});
-		
+
 	edges.on('mouseout', function(d) {
 			var edge = d3.select(this);
 			edge.transition()
@@ -536,17 +536,17 @@ function drawGraph() {
 			edgeDiv.transition()
 				.duration(200)
 				.style('opacity', 0);
-				
+
 			});
-			
+
 	// Add nodes
 	pane.selectAll('#nodes').remove();
 	nodes = pane.append('g').attr('id', 'nodes')
 		.selectAll('g').data(graph.nodesn).enter().append('g');
-	
+
 	updateNodes();
 	updateGraph();
-	
+
 	nodes.on('dblclick', function(d) {
 		if (d.value.hasChilds) {
 			d.value.showChilds = !d.value.showChilds;
@@ -560,7 +560,7 @@ function drawGraph() {
 			drawGraph();
 		}
 	});
-		
+
 	nodes.on('mouseover', function(node) {
 		// Highlight incoming edges
 		edges.each(function (d, i) {
@@ -573,16 +573,16 @@ function drawGraph() {
 		});
 		// Show node details
 		if (!isEditNode) {
-		   	nodeInfo.transition()        
-		        .duration(200)      
+		   	nodeInfo.transition()
+		        .duration(200)
 		        .style('opacity', .9);
 		    nodeInfo
 		    	.html(formatNodeInfos(node))
-		        .style('left', (d3.event.pageX) + 30 + 'px')     
+		        .style('left', (d3.event.pageX) + 30 + 'px')
 		        .style('top', (d3.event.pageY - 28) + 'px');
 		}
 	});
-		
+
 	nodes.on('mouseout', function(node) {
 		edges.each(function (d, i) {
 			var edge = d3.select(this);
@@ -594,9 +594,9 @@ function drawGraph() {
 		});
 	   	hideNodeInfo();
 	});
-	
+
 	nodes.on('contextmenu', d3.contextMenu(menuItems));
-	
+
 	forceLayout = d3.layout.force()
 		.nodes(graph.nodes)
 		.links(graph.edges)
@@ -608,7 +608,7 @@ function drawGraph() {
 		.friction(0.5)
 		.on('tick', updateGraph)
 		.start();
-		
+
 	// Drag behavour
 	var drag = forceLayout.drag()
 		.on('dragstart', function(d) {
@@ -694,11 +694,11 @@ function updateGraph() {
 	for (var i in graph.nodes) {
 		q.visit(collide(graph.nodes[i]));
 	}
-		
+
 	graph.hulls = convexHulls(graph);
 	hulls.data(graph.hulls)
 		.attr('d', drawConvexHull);
-	
+
 	// Update nodes
 	nodes.attr('transform', function(d) { return 'translate(' + (d.x - d.value.cx) + ' ' + (d.y - d.value.cy) + ')'; });
 	// Update edges
@@ -720,7 +720,7 @@ function updateGraph() {
 
 /*
  * Toggles between usual nodes colors and profiling colors
- */	
+ */
 function toggleNodeColors() {
 		useProfileColors = !useProfileColors;
 		updateNodes();
@@ -730,7 +730,7 @@ function toggleNodeColors() {
 
 /*
  * Computes bounding box that fits text of a certain length.
- */	
+ */
 function textSize(text, attr) {
 	var t = svg.append('text').text(text);
 	if (typeof(attr) != 'undefined') {
@@ -815,7 +815,7 @@ function formatNodeInfos(node) {
 		s += ' / ' + (p[0] / p[1] * 100).toFixed(1) + ' %';
 		s += '</p>';
 	}
-	return s;	
+	return s;
 }
 
 
@@ -841,14 +841,14 @@ function updateNode(d, node) {
 			.attr('height', d.value.height);
 	}
 	shape.attr('fill', nodeFillColor(d));
-	
+
 	node.selectAll('text').remove();
 	var text = node.append('text')
 		.attr('class', 'nodeText')
 		.attr('x', pad)
 		.attr('dy', function(d) {return d.value.height - pad - 5;})
 		.text(function(d) {return d.value.label;});
-		
+
 	if (d.value.hasChilds) {
 		node.style('cursor', 'pointer');
 	}
@@ -862,7 +862,7 @@ function updateNodes() {
 	nodes.each(function(d) {
 		var node = d3.select(this);
 		updateNode(d, node);
-	});	
+	});
 }
 
 
@@ -870,8 +870,8 @@ function updateNodes() {
  * Hides node information field.
  */
 function hideNodeInfo() {
-	nodeInfo.transition()        
-        .duration(200)      
+	nodeInfo.transition()
+        .duration(200)
         .style('opacity', 0);
 }
 
@@ -888,17 +888,17 @@ function setNodeSize(node) {
 	}
 
 
-/* 
+/*
  * Event handler for editing nodes.
  */
 function editNode(elm, d) {
 		var node = d3.select(elm);
 		var pos = elm.getBBox();
 		if (d3.event.defaultPrevented) return;
-		
+
 		isEditNode = true;
 		hideNodeInfo();
-		
+
 		var form = node.append('foreignObject')
 		.attr('x', pos.x)
 		.attr('y', pos.y)
diff --git a/theano/d3viz/js/dagre-d3.min.js b/aesara/d3viz/js/dagre-d3.min.js
similarity index 100%
rename from theano/d3viz/js/dagre-d3.min.js
rename to aesara/d3viz/js/dagre-d3.min.js
diff --git a/theano/d3viz/js/graphlib-dot.min.js b/aesara/d3viz/js/graphlib-dot.min.js
similarity index 100%
rename from theano/d3viz/js/graphlib-dot.min.js
rename to aesara/d3viz/js/graphlib-dot.min.js
diff --git a/theano/gpuarray/__init__.py b/aesara/gpuarray/__init__.py
similarity index 92%
rename from theano/gpuarray/__init__.py
rename to aesara/gpuarray/__init__.py
index d888a932d8..266154252c 100644
--- a/theano/gpuarray/__init__.py
+++ b/aesara/gpuarray/__init__.py
@@ -3,13 +3,13 @@
 import sys
 import warnings
 
-import theano
-from theano.compile import optdb
-from theano.configdefaults import config
-from theano.tensor.basic import register_transfer
+import aesara
+from aesara.compile import optdb
+from aesara.configdefaults import config
+from aesara.tensor.basic import register_transfer
 
 
-_logger_name = "theano.gpuarray"
+_logger_name = "aesara.gpuarray"
 _logger = logging.getLogger(_logger_name)
 
 error = _logger.error
@@ -17,14 +17,14 @@
 
 pygpu_activated = False
 # Used to skip initialization checking when we are in the same processus.
-theano_gpu_is_already_active = False
+aesara_gpu_is_already_active = False
 try:
     import pygpu
     import pygpu.gpuarray
 except ImportError:
     pygpu = None
 
-from theano.gpuarray import (
+from aesara.gpuarray import (
     ctc,
     dnn,
     extra_ops,
@@ -35,10 +35,10 @@
     rng_mrg,
     sort,
 )
-from theano.gpuarray.basic_ops import as_gpuarray_variable
+from aesara.gpuarray.basic_ops import as_gpuarray_variable
 
 # This is for documentation not to depend on the availability of pygpu
-from theano.gpuarray.type import (
+from aesara.gpuarray.type import (
     ContextNotDefined,
     GpuArrayConstant,
     GpuArraySharedVariable,
@@ -81,10 +81,10 @@ def pygpu_parse_version(version_string):
 
 def init_dev(dev, name=None, preallocate=None):
     global pygpu_activated
-    global theano_gpu_is_already_active
+    global aesara_gpu_is_already_active
     if (
-        not theano_gpu_is_already_active
-        and os.environ.get("THEANO_GPU_IS_ALREADY_ACTIVE", "") == "Yes"
+        not aesara_gpu_is_already_active
+        and os.environ.get("AESARA_GPU_IS_ALREADY_ACTIVE", "") == "Yes"
     ):
         raise RuntimeError(
             "You can't initialize the GPU in a subprocess if the parent process already did it"
@@ -102,10 +102,10 @@ def init_dev(dev, name=None, preallocate=None):
     gpuarray_version_major_detected = pygpu.gpuarray.api_version()[0]
     if gpuarray_version_major_detected != gpuarray_version_major_supported:
         raise ValueError(
-            "Your installed version of libgpuarray is not in sync with the current Theano"
+            "Your installed version of libgpuarray is not in sync with the current Aesara"
             f" version. The installed libgpuarray version supports API version {int(gpuarray_version_major_detected)},"
-            f" while current Theano supports API version {int(gpuarray_version_major_supported)}. Change the version of"
-            " libgpuarray or Theano to fix this problem.",
+            f" while current Aesara supports API version {int(gpuarray_version_major_supported)}. Change the version of"
+            " libgpuarray or Aesara to fix this problem.",
         )
     if dev not in init_dev.devmap:
         args = dict()
@@ -123,8 +123,8 @@ def init_dev(dev, name=None, preallocate=None):
             single_stream=config.gpuarray__single_stream,
             **args,
         )
-        os.environ["THEANO_GPU_IS_ALREADY_ACTIVE"] = "Yes"
-        theano_gpu_is_already_active = True
+        os.environ["AESARA_GPU_IS_ALREADY_ACTIVE"] = "Yes"
+        aesara_gpu_is_already_active = True
         context.dev = dev
         init_dev.devmap[dev] = context
         reg_context(name, context)
@@ -251,9 +251,9 @@ def use(
         optdb.add_tags("gpuarray_opt", "fast_run", "fast_compile")
         optdb.add_tags("gpua_scanOp_make_inplace", "fast_run")
     if move_shared_to_gpu:
-        import theano.compile
+        import aesara.compile
 
-        theano.compile.shared_constructor(gpuarray_shared_constructor)
+        aesara.compile.shared_constructor(gpuarray_shared_constructor)
 
 
 if pygpu:
diff --git a/theano/gpuarray/basic_ops.py b/aesara/gpuarray/basic_ops.py
similarity index 98%
rename from theano/gpuarray/basic_ops.py
rename to aesara/gpuarray/basic_ops.py
index afc57890ed..7fd53dbb8b 100644
--- a/theano/gpuarray/basic_ops.py
+++ b/aesara/gpuarray/basic_ops.py
@@ -5,22 +5,22 @@
 
 import numpy as np
 
-import theano
-import theano.tensor as tt
-from theano.configdefaults import config
-from theano.gradient import grad_undefined
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import COp, ExternalCOp, Op, _NoPythonOp
-from theano.graph.opt import copy_stack_trace
-from theano.graph.params_type import ParamsType
-from theano.graph.type import CType
-from theano.graph.utils import MethodNotDefined
-from theano.link.c.interface import HideC
-from theano.scalar import bool as bool_t
-from theano.scalar import int32 as int32_t
-from theano.tensor.basic import Alloc, AllocEmpty, Join, Split, alloc_validate_shape
-from theano.tensor.shape import Reshape
-from theano.tensor.type import TensorType, values_eq_approx_always_true
+import aesara
+import aesara.tensor as tt
+from aesara.configdefaults import config
+from aesara.gradient import grad_undefined
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import COp, ExternalCOp, Op, _NoPythonOp
+from aesara.graph.opt import copy_stack_trace
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import CType
+from aesara.graph.utils import MethodNotDefined
+from aesara.link.c.interface import HideC
+from aesara.scalar import bool as bool_t
+from aesara.scalar import int32 as int32_t
+from aesara.tensor.basic import Alloc, AllocEmpty, Join, Split, alloc_validate_shape
+from aesara.tensor.shape import Reshape
+from aesara.tensor.type import TensorType, values_eq_approx_always_true
 
 
 try:
@@ -29,8 +29,8 @@
 except ImportError:
     pass
 
-from theano.gpuarray.fp16_help import write_w
-from theano.gpuarray.type import (
+from aesara.gpuarray.fp16_help import write_w
+from aesara.gpuarray.type import (
     EQ_MAP,
     ContextNotDefined,
     GpuArrayConstant,
@@ -762,7 +762,7 @@ def c_code(self, node, name, inputs, outputs, sub):
           %(fail)s
 
         if (%(out)s == NULL || !GpuArray_IS_C_CONTIGUOUS(&%(out)s->ga) ||
-            !theano_size_check(%(out)s, PyArray_NDIM(%(name)s_tmp),
+            !aesara_size_check(%(out)s, PyArray_NDIM(%(name)s_tmp),
                                (size_t *)PyArray_DIMS(%(name)s_tmp),
                                get_typecode((PyObject *)PyArray_DESCR(%(name)s_tmp)))) {
           Py_XDECREF(%(out)s);
@@ -1119,7 +1119,7 @@ def c_code(self, node, name, inp, out, sub):
 
         code.append(
             """
-if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(params)s->typecode, GA_C_ORDER,
+if (aesara_prep_output(&%(zz)s, %(ndim)s, shape, %(params)s->typecode, GA_C_ORDER,
                        %(params)s->context)) {
   %(fail)s
 }
@@ -1189,7 +1189,7 @@ def c_code(self, node, name, inp, out, sub):
                 Py_INCREF(%(z)s);
 
             } else if (NULL == %(z)s
-                || !theano_size_check(%(z)s, PyGpuArray_NDIM(%(input)s), PyGpuArray_DIMS(%(input)s),
+                || !aesara_size_check(%(z)s, PyGpuArray_NDIM(%(input)s), PyGpuArray_DIMS(%(input)s),
                                       %(input)s->ga.typecode)
                 || !GpuArray_IS_C_CONTIGUOUS(&(%(z)s->ga)))
             {
@@ -1674,7 +1674,7 @@ def c_code(self, node, name, inputs, outputs, sub):
         return main_code % locals()
 
 
-@theano.compile.profiling.register_profiler_printer
+@aesara.compile.profiling.register_profiler_printer
 def profile_printer(
     message, compile_time, fct_call_time, apply_time, apply_cimpl, outputs_size, file
 ):
@@ -1717,7 +1717,7 @@ def profile_printer(
         )
 
         print("", file=file)
-        print("    Theano function input that are float64", file=file)
+        print("    Aesara function input that are float64", file=file)
         print("    <fct name> <input name> <input type> <str input>", file=file)
         for fg in fgraphs:
             for i in fg.inputs:
diff --git a/theano/gpuarray/blas.py b/aesara/gpuarray/blas.py
similarity index 97%
rename from theano/gpuarray/blas.py
rename to aesara/gpuarray/blas.py
index c476610254..f32f832619 100644
--- a/theano/gpuarray/blas.py
+++ b/aesara/gpuarray/blas.py
@@ -1,6 +1,6 @@
-import theano
-from theano.compile import optdb
-from theano.gpuarray.basic_ops import (
+import aesara
+from aesara.compile import optdb
+from aesara.gpuarray.basic_ops import (
     CGpuKernelBase,
     GpuArrayType,
     as_gpuarray_variable,
@@ -8,20 +8,20 @@
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.opt_util import inplace_allocempty
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonCOp
-from theano.graph.opt import LocalOptGroup, in2out
-from theano.graph.params_type import ParamsType
-from theano.scalar import bool as bool_t
-from theano.tensor.basic import as_tensor_variable
+from aesara.gpuarray.opt_util import inplace_allocempty
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonCOp
+from aesara.graph.opt import LocalOptGroup, in2out
+from aesara.graph.params_type import ParamsType
+from aesara.scalar import bool as bool_t
+from aesara.tensor.basic import as_tensor_variable
 
 
 try:
     import pygpu
     from pygpu import blas
 except ImportError:
-    # To make sure theano is importable
+    # To make sure aesara is importable
     pass
 
 
@@ -67,7 +67,7 @@ def make_node(self, y, alpha, A, x, beta):
 
         # float16 not supported
         expected = A.dtype
-        assert theano.scalar.upcast(alpha.dtype, beta.dtype, expected) == expected
+        assert aesara.scalar.upcast(alpha.dtype, beta.dtype, expected) == expected
         alpha = alpha.astype(expected)
         beta = beta.astype(expected)
         return Apply(self, [y, alpha, A, x, beta], [y.type()])
@@ -97,7 +97,7 @@ def c_code(self, node, name, inp, out, sub):
         code = (
             """
                if (!%(params)s->inplace || %(y)s->ga.strides[0] <= 0) {
-                 %(out)s = theano_try_copy(%(out)s, %(y)s);
+                 %(out)s = aesara_try_copy(%(out)s, %(y)s);
                  if (%(out)s == NULL) {
                    %(fail)s
                  }
@@ -183,17 +183,17 @@ def make_node(self, C, alpha, A, B, beta):
 
         if not (A.dtype == B.dtype == C.dtype):
             raise TypeError(
-                theano.tensor.blas.Gemm.E_mixed,
+                aesara.tensor.blas.Gemm.E_mixed,
                 (A.dtype, B.dtype, C.dtype, alpha.dtype, beta.dtype),
             )
         if not A.dtype.startswith("float"):
-            raise TypeError(theano.tensor.blas.Gemm.E_float, (A.dtype))
+            raise TypeError(aesara.tensor.blas.Gemm.E_float, (A.dtype))
 
         if A.dtype == "float16":
             expected = "float32"
         else:
             expected = A.dtype
-        assert theano.scalar.upcast(alpha.dtype, beta.dtype, expected) == expected
+        assert aesara.scalar.upcast(alpha.dtype, beta.dtype, expected) == expected
         alpha = alpha.astype(expected)
         beta = beta.astype(expected)
 
@@ -226,7 +226,7 @@ def c_code(self, node, name, inp, out, sub):
         code = (
             """
                if (!%(params)s->inplace || !GpuArray_ISONESEGMENT(&%(C)s->ga)) {
-                 %(out)s = theano_try_copy(%(out)s, %(C)s);
+                 %(out)s = aesara_try_copy(%(out)s, %(C)s);
                  if (%(out)s == NULL) {
                    %(fail)s
                  }
@@ -281,7 +281,7 @@ def make_node(self, A, alpha, x, y):
                 "ger requires matching dtypes", (A.dtype, alpha.dtype, x.dtype, y.dtype)
             )
 
-        assert theano.scalar.upcast(alpha.dtype, A.dtype) == A.dtype
+        assert aesara.scalar.upcast(alpha.dtype, A.dtype) == A.dtype
         alpha = alpha.astype(A.dtype)
         assert alpha.ndim == 0
         assert A.ndim == 2
@@ -310,7 +310,7 @@ def c_code(self, node, name, inp, out, sub):
         code = (
             """
                if (!%(params)s->inplace || !GpuArray_ISONESEGMENT(&%(A)s->ga)) {
-                 %(out)s = theano_try_copy(%(out)s, %(A)s);
+                 %(out)s = aesara_try_copy(%(out)s, %(A)s);
                  if (%(out)s == NULL) {
                    %(fail)s
                  }
@@ -385,7 +385,7 @@ def c_code(self, node, name, inputs, outputs, sub):
         dims[0] = PyGpuArray_DIMS(%(A)s)[0];
         dims[1] = PyGpuArray_DIMS(%(B)s)[1];
 
-        if (theano_prep_output(&%(out)s, 2, dims, %(typecode)s, GA_C_ORDER,
+        if (aesara_prep_output(&%(out)s, 2, dims, %(typecode)s, GA_C_ORDER,
                                %(A)s->context)) {
             %(fail)s
         }
@@ -463,7 +463,7 @@ def c_code(self, node, name, inp, out, sub):
         int err;
         if (%(params)s->inplace){
                    if (!GpuArray_ISONESEGMENT(&%(C)s->ga)) {
-                     %(out)s = theano_try_copy(%(out)s, %(C)s);
+                     %(out)s = aesara_try_copy(%(out)s, %(C)s);
                      if (%(out)s == NULL) {
                        %(fail)s
                      }
@@ -473,7 +473,7 @@ def c_code(self, node, name, inp, out, sub):
                      Py_INCREF(%(out)s);
                    }
         } else {
-                   %(out)s = theano_try_copy(%(out)s, %(C)s);
+                   %(out)s = aesara_try_copy(%(out)s, %(C)s);
                    if (%(out)s == NULL) {
                        %(fail)s
                    }
@@ -975,7 +975,7 @@ def c_code_helper(
     }
 
     // Prepare output array
-    if (theano_prep_output(&%(out)s, odim, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
+    if (aesara_prep_output(&%(out)s, odim, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
     {
         if (odim == 4) {
             PyErr_Format(PyExc_RuntimeError,
@@ -1048,13 +1048,13 @@ class GpuCorrMM(BaseGpuCorrMM, _NoPythonCOp):
     -----
     Currently, the Op requires the inputs, filters and outputs to be
     C-contiguous. Use :func:`gpu_contiguous
-    <theano.gpuarray.basic_ops.gpu_contiguous>` on these arguments
+    <aesara.gpuarray.basic_ops.gpu_contiguous>` on these arguments
     if needed.
 
-    You can either enable the Theano flag `optimizer_including=conv_gemm`
+    You can either enable the Aesara flag `optimizer_including=conv_gemm`
     to automatically replace all convolution operations with `GpuCorrMM`
     or one of its gradients, or you can use it as a replacement for
-    :func:`conv2d <theano.tensor.nnet.conv.conv2d>`, called as
+    :func:`conv2d <aesara.tensor.nnet.conv.conv2d>`, called as
     `GpuCorrMM(subsample=...)(image, filters)`. The latter is currently
     faster, but note that it computes a correlation -- if you need to
     compute a convolution, flip the filters as `filters[:,:,::-1,::-1]`.
@@ -1133,7 +1133,7 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM, _NoPythonCOp):
 
     Notes
     -----
-    You will not want to use this directly, but rely on Theano's automatic
+    You will not want to use this directly, but rely on Aesara's automatic
     differentiation or graph optimization to use it as needed.
 
     """
@@ -1222,7 +1222,7 @@ def grad(self, inp, grads):
             self.unshared,
         )(bottom, weights)
         d_height_width = (
-            (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
+            (aesara.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
         )
         return (d_bottom, d_top) + d_height_width
 
@@ -1239,7 +1239,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM, _NoPythonCOp):
 
     Notes
     -----
-    You will not want to use this directly, but rely on Theano's automatic
+    You will not want to use this directly, but rely on Aesara's automatic
     differentiation or graph optimization to use it as needed.
 
     """
@@ -1324,7 +1324,7 @@ def grad(self, inp, grads):
             self.unshared,
         )(bottom, weights)
         d_height_width = (
-            (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
+            (aesara.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
         )
         return (d_weights, d_top) + d_height_width
 
@@ -1750,7 +1750,7 @@ def c_code_helper(
     out_dim_size[4] = (size_t)out_dim[4];
 
     // Prepare output array
-    if (theano_prep_output(&%(out)s, 5, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
+    if (aesara_prep_output(&%(out)s, 5, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
     {
         PyErr_Format(PyExc_RuntimeError,
                 "BaseGpuCorrMM: Failed to allocate output of %%lld x %%lld x %%lld x %%lld x %%lld",
@@ -1812,13 +1812,13 @@ class GpuCorr3dMM(BaseGpuCorr3dMM, _NoPythonCOp):
     -----
     Currently, the Op requires the inputs, filters and outputs to be
     C-contiguous. Use :func:`gpu_contiguous
-    <theano.gpuarray.basic_ops.gpu_contiguous>` on these arguments
+    <aesara.gpuarray.basic_ops.gpu_contiguous>` on these arguments
     if needed.
 
-    You can either enable the Theano flag `optimizer_including=conv_gemm`
+    You can either enable the Aesara flag `optimizer_including=conv_gemm`
     to automatically replace all convolution operations with `GpuCorr3dMM`
     or one of its gradients, or you can use it as a replacement for
-    :func:`conv2d <theano.tensor.nnet.conv.conv2d>`, called as
+    :func:`conv2d <aesara.tensor.nnet.conv.conv2d>`, called as
     `GpuCorr3dMM(subsample=...)(image, filters)`. The latter is currently
     faster, but note that it computes a correlation -- if you need to
     compute a convolution, flip the filters as `filters[:,:,::-1,::-1,::-1]`.
@@ -1885,7 +1885,7 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM, _NoPythonCOp):
 
     Notes
     -----
-    You will not want to use this directly, but rely on Theano's automatic
+    You will not want to use this directly, but rely on Aesara's automatic
     differentiation or graph optimization to use it as needed.
 
     """
@@ -1957,7 +1957,7 @@ def grad(self, inp, grads):
             self.border_mode, self.subsample, self.filter_dilation, self.num_groups
         )(bottom, weights)
         d_height_width_depth = (
-            (theano.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
+            (aesara.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
         )
         return (d_bottom, d_top) + d_height_width_depth
 
@@ -1974,7 +1974,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM, _NoPythonCOp):
 
     Notes
     -----
-    You will not want to use this directly, but rely on Theano's automatic
+    You will not want to use this directly, but rely on Aesara's automatic
     differentiation or graph optimization to use it as needed.
 
     """
@@ -2048,7 +2048,7 @@ def grad(self, inp, grads):
             self.border_mode, self.subsample, self.filter_dilation, self.num_groups
         )(bottom, weights)
         d_height_width_depth = (
-            (theano.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
+            (aesara.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
         )
         return (d_weights, d_top) + d_height_width_depth
 
diff --git a/theano/gpuarray/blocksparse.py b/aesara/gpuarray/blocksparse.py
similarity index 89%
rename from theano/gpuarray/blocksparse.py
rename to aesara/gpuarray/blocksparse.py
index 7e2ac10264..ffc978eff0 100644
--- a/theano/gpuarray/blocksparse.py
+++ b/aesara/gpuarray/blocksparse.py
@@ -2,23 +2,23 @@
 
 import numpy as np
 
-from theano import tensor as tt
-from theano.gpuarray.basic_ops import (
+from aesara import tensor as tt
+from aesara.gpuarray.basic_ops import (
     as_gpuarray_variable,
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.type import gpu_context_type
-from theano.gradient import grad_undefined
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonExternalCOp
-from theano.graph.params_type import ParamsType
-from theano.scalar import bool as bool_t
-from theano.tensor import as_tensor_variable
-from theano.tensor.type import discrete_dtypes
+from aesara.gpuarray.type import gpu_context_type
+from aesara.gradient import grad_undefined
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonExternalCOp
+from aesara.graph.params_type import ParamsType
+from aesara.scalar import bool as bool_t
+from aesara.tensor import as_tensor_variable
+from aesara.tensor.type import discrete_dtypes
 
 
-_logger = logging.getLogger("theano.gpuarray.blocksparse")
+_logger = logging.getLogger("aesara.gpuarray.blocksparse")
 
 
 class GpuSparseBlockGemv(_NoPythonExternalCOp):
diff --git a/theano/gpuarray/c_code/blockgemv.c b/aesara/gpuarray/c_code/blockgemv.c
similarity index 99%
rename from theano/gpuarray/c_code/blockgemv.c
rename to aesara/gpuarray/c_code/blockgemv.c
index 1f4e5bf495..bc475ee1af 100644
--- a/theano/gpuarray/c_code/blockgemv.c
+++ b/aesara/gpuarray/c_code/blockgemv.c
@@ -11,7 +11,7 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
     out = o;
     Py_INCREF(out);
   } else {
-    out = theano_try_copy(out, o);
+    out = aesara_try_copy(out, o);
     if (out == NULL) {
       // Error already set
       return -1;
@@ -112,7 +112,7 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
   } else {
     err = GA_INVALID_ERROR;
   }
-  
+
   free(W_list);
   free(offW);
   free(inp_list);
@@ -126,4 +126,3 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
   *_out = out;
   return 0;
 }
-
diff --git a/theano/gpuarray/c_code/blockger.c b/aesara/gpuarray/c_code/blockger.c
similarity index 99%
rename from theano/gpuarray/c_code/blockger.c
rename to aesara/gpuarray/c_code/blockger.c
index 89248072c4..fd603375f1 100644
--- a/theano/gpuarray/c_code/blockger.c
+++ b/aesara/gpuarray/c_code/blockger.c
@@ -25,7 +25,7 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
     out = o;
     Py_INCREF(out);
   } else {
-    out = theano_try_copy(out, o);
+    out = aesara_try_copy(out, o);
     if (out == NULL)
       return -1;
   }
@@ -113,5 +113,3 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
   *_out = out;
   return 0;
 }
-
-
diff --git a/theano/gpuarray/c_code/conv_desc.c b/aesara/gpuarray/c_code/conv_desc.c
similarity index 100%
rename from theano/gpuarray/c_code/conv_desc.c
rename to aesara/gpuarray/c_code/conv_desc.c
diff --git a/theano/gpuarray/c_code/corr3d_gemm.c b/aesara/gpuarray/c_code/corr3d_gemm.c
similarity index 99%
rename from theano/gpuarray/c_code/corr3d_gemm.c
rename to aesara/gpuarray/c_code/corr3d_gemm.c
index 8726a58b03..fa3079ef57 100644
--- a/theano/gpuarray/c_code/corr3d_gemm.c
+++ b/aesara/gpuarray/c_code/corr3d_gemm.c
@@ -400,7 +400,7 @@ int col2im3d(GpuArray *data_col, const size_t channels,
 }
 
 
-// Theano op code
+// Aesara op code
 // Authors: Arjun Jain, Frederic Bastien, Jan Schluter
 // Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
 //   and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
diff --git a/theano/gpuarray/c_code/corr_gemm.c b/aesara/gpuarray/c_code/corr_gemm.c
similarity index 99%
rename from theano/gpuarray/c_code/corr_gemm.c
rename to aesara/gpuarray/c_code/corr_gemm.c
index 7c530c4dfe..7f0bca5ccd 100644
--- a/theano/gpuarray/c_code/corr_gemm.c
+++ b/aesara/gpuarray/c_code/corr_gemm.c
@@ -341,7 +341,7 @@ int col2im(GpuArray *data_col, const size_t channels,
 }
 
 
-// Theano op code
+// Aesara op code
 // Authors: Arjun Jain, Frederic Bastien, Jan Schluter
 // Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
 //   and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
@@ -396,7 +396,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
                     PyGpuArray_STRIDES(weight)[5]);
             return NULL;
         }
-        else {  
+        else {
             PyErr_Format(PyExc_ValueError,
                     "GpuCorrMM requires weight to be C-contiguous, "
                     "but strides are: %ld %ld %ld %ld\n",
@@ -653,7 +653,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
               }
             }
             else{
-              for(size_t g = 0; g < numgroups; g++){ 
+              for(size_t g = 0; g < numgroups; g++){
                   err = rgemm(cb_fortran, cb_trans, cb_no_trans,
                               K_, M_, N_, 1,
                               &col->ga, g * group_col_stride, N_,
diff --git a/theano/gpuarray/c_code/ctc_wrapper.c b/aesara/gpuarray/c_code/ctc_wrapper.c
similarity index 98%
rename from theano/gpuarray/c_code/ctc_wrapper.c
rename to aesara/gpuarray/c_code/ctc_wrapper.c
index bfb2494fa8..1e819d9f3f 100644
--- a/theano/gpuarray/c_code/ctc_wrapper.c
+++ b/aesara/gpuarray/c_code/ctc_wrapper.c
@@ -180,7 +180,7 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject   *  in_activations,
         return 1;
     }
 
-    if ( theano_prep_output( out_costs, 1, &cost_size, in_activations->ga.typecode,
+    if ( aesara_prep_output( out_costs, 1, &cost_size, in_activations->ga.typecode,
                              GA_C_ORDER, gpu_context ) != 0 )
     {
         ctc_context_destroy( context );
@@ -196,7 +196,7 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject   *  in_activations,
 
     if ( NULL != out_gradients )  // if gradient computation is not disabled
     {
-        if ( theano_prep_output( out_gradients, 3, grad_dims, in_activations->ga.typecode,
+        if ( aesara_prep_output( out_gradients, 3, grad_dims, in_activations->ga.typecode,
                                  GA_C_ORDER, gpu_context ) != 0 )
         {
             ctc_context_destroy( context );
diff --git a/theano/gpuarray/c_code/cudnn_helper.h b/aesara/gpuarray/c_code/cudnn_helper.h
similarity index 100%
rename from theano/gpuarray/c_code/cudnn_helper.h
rename to aesara/gpuarray/c_code/cudnn_helper.h
diff --git a/theano/gpuarray/c_code/dimshuffle.c b/aesara/gpuarray/c_code/dimshuffle.c
similarity index 97%
rename from theano/gpuarray/c_code/dimshuffle.c
rename to aesara/gpuarray/c_code/dimshuffle.c
index 765d04577b..adbf2cea1e 100644
--- a/theano/gpuarray/c_code/dimshuffle.c
+++ b/aesara/gpuarray/c_code/dimshuffle.c
@@ -28,7 +28,7 @@ int APPLY_SPECIFIC(gpu_dimshuffle)(PyGpuArrayObject* input, PyGpuArrayObject** o
 
     new_order = (npy_int64*) PyArray_DATA(params->_new_order);
     /* Type of params->transposition (npy_uint32) should be an alias of unsigned int
-     * on platforms supported by Theano. */
+     * on platforms supported by Aesara. */
     transposition = (unsigned int*) PyArray_DATA(params->transposition);
     sh = (size_t*) malloc(nd_out * sizeof(size_t));
     if (sh == NULL) {
diff --git a/theano/gpuarray/c_code/dnn_base.c b/aesara/gpuarray/c_code/dnn_base.c
similarity index 100%
rename from theano/gpuarray/c_code/dnn_base.c
rename to aesara/gpuarray/c_code/dnn_base.c
diff --git a/theano/gpuarray/c_code/dnn_batchnorm.c b/aesara/gpuarray/c_code/dnn_batchnorm.c
similarity index 91%
rename from theano/gpuarray/c_code/dnn_batchnorm.c
rename to aesara/gpuarray/c_code/dnn_batchnorm.c
index 2d4b15d569..951f926732 100644
--- a/theano/gpuarray/c_code/dnn_batchnorm.c
+++ b/aesara/gpuarray/c_code/dnn_batchnorm.c
@@ -30,13 +30,13 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
     Py_XDECREF(*outp);
     *outp = inp;
     Py_INCREF(*outp);
-  } else if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) {
+  } else if (aesara_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) {
     return 1;
   }
 
-  if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
+  if (aesara_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
     return 1;
-  if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
+  if (aesara_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
     return 1;
 
   if (c_set_tensorNd(*outp, bn_output) != 0)
@@ -51,7 +51,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
       Py_INCREF(running_mean);
     } else {
       running_mean = *out_running_mean;
-      running_mean = theano_try_copy(running_mean, in_running_mean);
+      running_mean = aesara_try_copy(running_mean, in_running_mean);
       if (running_mean == NULL) {
         return 1;
       }
@@ -62,7 +62,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
       Py_INCREF(running_var);
     } else {
       running_var = *out_running_var;
-      running_var = theano_try_copy(running_var, in_running_var);
+      running_var = aesara_try_copy(running_var, in_running_var);
       if (running_var == NULL) {
         return 1;
       }
diff --git a/theano/gpuarray/c_code/dnn_batchnorm_base.c b/aesara/gpuarray/c_code/dnn_batchnorm_base.c
similarity index 100%
rename from theano/gpuarray/c_code/dnn_batchnorm_base.c
rename to aesara/gpuarray/c_code/dnn_batchnorm_base.c
diff --git a/theano/gpuarray/c_code/dnn_batchnorm_grad.c b/aesara/gpuarray/c_code/dnn_batchnorm_grad.c
similarity index 93%
rename from theano/gpuarray/c_code/dnn_batchnorm_grad.c
rename to aesara/gpuarray/c_code/dnn_batchnorm_grad.c
index 92508c7b37..f7a4cb40cb 100644
--- a/theano/gpuarray/c_code/dnn_batchnorm_grad.c
+++ b/aesara/gpuarray/c_code/dnn_batchnorm_grad.c
@@ -39,11 +39,11 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
     return 1;
   }
 
-  if (theano_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
+  if (aesara_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
     return 1;
-  if (theano_prep_output(dscale, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
+  if (aesara_prep_output(dscale, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
     return 1;
-  if (theano_prep_output(dbias, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
+  if (aesara_prep_output(dbias, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
     return 1;
 
   if (c_set_tensorNd(*dinp, bn_output) != 0)
diff --git a/theano/gpuarray/c_code/dnn_batchnorm_inf.c b/aesara/gpuarray/c_code/dnn_batchnorm_inf.c
similarity index 96%
rename from theano/gpuarray/c_code/dnn_batchnorm_inf.c
rename to aesara/gpuarray/c_code/dnn_batchnorm_inf.c
index f22b4bc479..81b871e9a8 100644
--- a/theano/gpuarray/c_code/dnn_batchnorm_inf.c
+++ b/aesara/gpuarray/c_code/dnn_batchnorm_inf.c
@@ -2,7 +2,7 @@
 
 int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
                      PyGpuArrayObject *bias, PyGpuArrayObject *est_mean,
-                     PyGpuArrayObject *est_var, npy_float64 epsilon, 
+                     PyGpuArrayObject *est_var, npy_float64 epsilon,
                      PyGpuArrayObject **outp, PARAMS_TYPE* params) {
   PyGpuContextObject *c = inp->context;
 
@@ -21,7 +21,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
     *outp = inp;
     Py_INCREF(*outp);
   } else {
-    if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
+    if (aesara_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
       return 1;
   }
 
diff --git a/theano/gpuarray/c_code/dnn_conv_base.c b/aesara/gpuarray/c_code/dnn_conv_base.c
similarity index 99%
rename from theano/gpuarray/c_code/dnn_conv_base.c
rename to aesara/gpuarray/c_code/dnn_conv_base.c
index fb3fbf8d76..63862aeb51 100644
--- a/theano/gpuarray/c_code/dnn_conv_base.c
+++ b/aesara/gpuarray/c_code/dnn_conv_base.c
@@ -97,7 +97,7 @@ const char* const _cppver = "No timing available: C++11 or later is required.";
 
 #include <chrono>
 const char* const _cppver = NULL;
-struct TheanoTimer {
+struct AesaraTimer {
     double milliseconds;
     std::chrono::steady_clock::time_point base;
     void start() {base = std::chrono::steady_clock::now();}
diff --git a/theano/gpuarray/c_code/dnn_dropout_desc.c b/aesara/gpuarray/c_code/dnn_dropout_desc.c
similarity index 100%
rename from theano/gpuarray/c_code/dnn_dropout_desc.c
rename to aesara/gpuarray/c_code/dnn_dropout_desc.c
diff --git a/theano/gpuarray/c_code/dnn_dropout_fwd.c b/aesara/gpuarray/c_code/dnn_dropout_fwd.c
similarity index 96%
rename from theano/gpuarray/c_code/dnn_dropout_fwd.c
rename to aesara/gpuarray/c_code/dnn_dropout_fwd.c
index eebc3bddff..9c4656067c 100644
--- a/theano/gpuarray/c_code/dnn_dropout_fwd.c
+++ b/aesara/gpuarray/c_code/dnn_dropout_fwd.c
@@ -17,7 +17,7 @@ int dnn_dropout_fwd(PyGpuArrayObject *x,
   if (c_make_tensorNd(x, &xdesc))
     return -1;
 
-  if (theano_prep_output(y, x->ga.nd, x->ga.dimensions, x->ga.typecode,
+  if (aesara_prep_output(y, x->ga.nd, x->ga.dimensions, x->ga.typecode,
                          GA_C_ORDER, c)) {
     cudnnDestroyTensorDescriptor(xdesc);
     return -1;
diff --git a/theano/gpuarray/c_code/dnn_fwd.c b/aesara/gpuarray/c_code/dnn_fwd.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_fwd.c
rename to aesara/gpuarray/c_code/dnn_fwd.c
index 766ea5c416..47934875fa 100644
--- a/theano/gpuarray/c_code/dnn_fwd.c
+++ b/aesara/gpuarray/c_code/dnn_fwd.c
@@ -23,7 +23,7 @@ int     reuse_algo;
 AlgoRec prev_algo;
 std::string hash_prefix;
 
-#define THEANO_DONT_MEMSET_STRUCT
+#define AESARA_DONT_MEMSET_STRUCT
 
 #ifdef DEBUG
 char algorithm_name[128];
@@ -52,7 +52,7 @@ int dnn_conv_fwd_fallback(cudnnConvolutionFwdAlgo_t* _algo,
         algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING))
   {
     #ifdef DEBUG
-    if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name))
+    if (0 != aesara_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name))
         return 1;
     fprintf(stderr, "(%s unsupported for 3D: fallback to CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM)\n", algorithm_name);
     #endif
@@ -145,7 +145,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
   if (_cppver) fprintf(stderr, "%s\n", _cppver);
   #endif
   #ifdef DEBUG_TIMING
-  TheanoTimer timer;
+  AesaraTimer timer;
   #endif
 
   if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(kerns)[1] * params->num_groups) {
@@ -179,7 +179,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
     *output = om;
     Py_INCREF(*output);
   } else {
-    if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om),
+    if (aesara_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om),
                            om->ga.typecode, GA_C_ORDER, c) != 0)
       return 1;
     if (beta != 0.0 && pygpu_move(*output, om))
@@ -361,7 +361,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
     if (err == CUDNN_STATUS_NOT_SUPPORTED) {
       // Fallback to none algo if not supported
       #ifdef DEBUG
-      if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) {
+      if (0 != aesara_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) {
         cuda_exit(c->ctx);
         return 1;
       }
@@ -389,7 +389,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
   if (params->choose_algo) {
 
 #ifdef DEBUG
-    if (0 != theano_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) {
+    if (0 != aesara_enum_to_string_cudnnConvolutionFwdAlgo_t(algo, algorithm_name)) {
       cuda_exit(c->ctx);
       return 1;
     }
@@ -498,5 +498,3 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
   #endif
   return 0;
 }
-
-
diff --git a/theano/gpuarray/c_code/dnn_gi.c b/aesara/gpuarray/c_code/dnn_gi.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_gi.c
rename to aesara/gpuarray/c_code/dnn_gi.c
index 488fdf94ca..8c1762fbe8 100644
--- a/theano/gpuarray/c_code/dnn_gi.c
+++ b/aesara/gpuarray/c_code/dnn_gi.c
@@ -23,7 +23,7 @@ int     reuse_algo;
 AlgoRec prev_algo;
 std::string hash_prefix;
 
-#define THEANO_DONT_MEMSET_STRUCT
+#define AESARA_DONT_MEMSET_STRUCT
 
 #ifdef DEBUG
 char algorithm_name[128];
@@ -110,7 +110,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
   if (_cppver) fprintf(stderr, "%s\n", _cppver);
   #endif
   #ifdef DEBUG_TIMING
-  TheanoTimer timer;
+  AesaraTimer timer;
   #endif
 
   if (PyGpuArray_DIMS(im)[1] != PyGpuArray_DIMS(kerns)[1] * params->num_groups) {
@@ -144,7 +144,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
     *input = im;
     Py_INCREF(*input);
   } else {
-    if (theano_prep_output(input, PyGpuArray_NDIM(im), PyGpuArray_DIMS(im),
+    if (aesara_prep_output(input, PyGpuArray_NDIM(im), PyGpuArray_DIMS(im),
                            im->ga.typecode, GA_C_ORDER, c) != 0)
       return 1;
     if (beta != 0.0 && pygpu_move(*input, im))
@@ -323,7 +323,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
     if (err == CUDNN_STATUS_NOT_SUPPORTED) {
       // Fallback to none algo if not supported
       #ifdef DEBUG
-      if (0 != theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) {
+      if (0 != aesara_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) {
         cuda_exit(c->ctx);
         return 1;
       }
@@ -347,7 +347,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
   if (params->choose_algo) {
 
 #ifdef DEBUG
-    if (0 != theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) {
+    if (0 != aesara_enum_to_string_cudnnConvolutionBwdDataAlgo_t(algo, algorithm_name)) {
         cuda_exit(c->ctx);
         return 1;
     }
diff --git a/theano/gpuarray/c_code/dnn_gw.c b/aesara/gpuarray/c_code/dnn_gw.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_gw.c
rename to aesara/gpuarray/c_code/dnn_gw.c
index 29ccc5ee77..d36846a838 100644
--- a/theano/gpuarray/c_code/dnn_gw.c
+++ b/aesara/gpuarray/c_code/dnn_gw.c
@@ -23,7 +23,7 @@ int     reuse_algo;
 AlgoRec prev_algo;
 std::string hash_prefix;
 
-#define THEANO_DONT_MEMSET_STRUCT
+#define AESARA_DONT_MEMSET_STRUCT
 
 #ifdef DEBUG
 char algorithm_name[128];
@@ -97,7 +97,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
   if (_cppver) fprintf(stderr, "%s\n", _cppver);
   #endif
   #ifdef DEBUG_TIMING
-  TheanoTimer timer;
+  AesaraTimer timer;
   #endif
 
   if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(km)[1] * params->num_groups) {
@@ -131,7 +131,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
     *kerns = km;
     Py_INCREF(*kerns);
   } else {
-    if (theano_prep_output(kerns, PyGpuArray_NDIM(km), PyGpuArray_DIMS(km),
+    if (aesara_prep_output(kerns, PyGpuArray_NDIM(km), PyGpuArray_DIMS(km),
                            km->ga.typecode, GA_C_ORDER, c) != 0)
       return 1;
     if (beta != 0.0 && pygpu_move(*kerns, km))
@@ -313,7 +313,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
     if (err == CUDNN_STATUS_NOT_SUPPORTED) {
       // Fallback to none algo if not supported
 #ifdef DEBUG
-      if (0 != theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) {
+      if (0 != aesara_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) {
         cuda_exit(c->ctx);
         return 1;
       }
@@ -337,7 +337,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
   if (params->choose_algo) {
 
 #ifdef DEBUG
-    if (0 != theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) {
+    if (0 != aesara_enum_to_string_cudnnConvolutionBwdFilterAlgo_t(algo, algorithm_name)) {
       cuda_exit(c->ctx);
       return 1;
     }
diff --git a/theano/gpuarray/c_code/dnn_pool.c b/aesara/gpuarray/c_code/dnn_pool.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_pool.c
rename to aesara/gpuarray/c_code/dnn_pool.c
index ed448be06b..cab099f124 100644
--- a/theano/gpuarray/c_code/dnn_pool.c
+++ b/aesara/gpuarray/c_code/dnn_pool.c
@@ -75,7 +75,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
   if (ndims == 3)
     dims[4] = (PyGpuArray_DIM(img, 4) + (p[2]*2) - w[2]) / s[2] + 1;
 
-  if (theano_prep_output(out, ndims+2, dims, img->ga.typecode,
+  if (aesara_prep_output(out, ndims+2, dims, img->ga.typecode,
                          GA_C_ORDER, c) != 0)
     return 1;
 
diff --git a/theano/gpuarray/c_code/dnn_pool_grad.c b/aesara/gpuarray/c_code/dnn_pool_grad.c
similarity index 99%
rename from theano/gpuarray/c_code/dnn_pool_grad.c
rename to aesara/gpuarray/c_code/dnn_pool_grad.c
index 66c4535bc7..3dc43b992b 100644
--- a/theano/gpuarray/c_code/dnn_pool_grad.c
+++ b/aesara/gpuarray/c_code/dnn_pool_grad.c
@@ -83,7 +83,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
     return 1;
   }
 
-  if (theano_prep_output(inp_grad, PyGpuArray_NDIM(inp),
+  if (aesara_prep_output(inp_grad, PyGpuArray_NDIM(inp),
                          PyGpuArray_DIMS(inp), inp->ga.typecode,
                          GA_C_ORDER, c) != 0) {
     return 1;
diff --git a/theano/gpuarray/c_code/dnn_redux.c b/aesara/gpuarray/c_code/dnn_redux.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_redux.c
rename to aesara/gpuarray/c_code/dnn_redux.c
index 2e039ebee2..1887f268d6 100644
--- a/theano/gpuarray/c_code/dnn_redux.c
+++ b/aesara/gpuarray/c_code/dnn_redux.c
@@ -88,7 +88,7 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input,
   }
 
   if (indices != NULL) {
-    if (theano_prep_output(indices, p, dims, GA_UINT, GA_C_ORDER, c) != 0)
+    if (aesara_prep_output(indices, p, dims, GA_UINT, GA_C_ORDER, c) != 0)
       return 1;
     indsize = PyGpuArray_SIZE(*indices) * 4;
   }
@@ -159,7 +159,7 @@ int APPLY_SPECIFIC(dnn_redux)(PyGpuArrayObject *input,
     return 0;
   }
 
-  if (theano_prep_output(output, p, dims, input->ga.typecode,
+  if (aesara_prep_output(output, p, dims, input->ga.typecode,
                          GA_C_ORDER, c) != 0)
     return 1;
 
diff --git a/theano/gpuarray/c_code/dnn_rnn_desc.c b/aesara/gpuarray/c_code/dnn_rnn_desc.c
similarity index 100%
rename from theano/gpuarray/c_code/dnn_rnn_desc.c
rename to aesara/gpuarray/c_code/dnn_rnn_desc.c
diff --git a/theano/gpuarray/c_code/dnn_rnn_fwd.c b/aesara/gpuarray/c_code/dnn_rnn_fwd.c
similarity index 97%
rename from theano/gpuarray/c_code/dnn_rnn_fwd.c
rename to aesara/gpuarray/c_code/dnn_rnn_fwd.c
index 9de485588b..012529ee83 100644
--- a/theano/gpuarray/c_code/dnn_rnn_fwd.c
+++ b/aesara/gpuarray/c_code/dnn_rnn_fwd.c
@@ -85,7 +85,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, uint32_t numDirs,
   shape[0] = seqLength;
   shape[1] = miniBatch;
   shape[2] = hiddenSize * numDirs;
-  if (theano_prep_output(y, 3, shape, x->ga.typecode, GA_C_ORDER, c) != 0)
+  if (aesara_prep_output(y, 3, shape, x->ga.typecode, GA_C_ORDER, c) != 0)
     goto fail;
 
   err = cudnnCreateTensorDescriptor(&ydesc);
@@ -112,7 +112,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, uint32_t numDirs,
     goto fail;
   }
 
-  if (theano_prep_output(hy, 3, PyGpuArray_DIMS(hx),
+  if (aesara_prep_output(hy, 3, PyGpuArray_DIMS(hx),
                          hx->ga.typecode, GA_C_ORDER, c) != 0)
     goto fail;
 
@@ -120,7 +120,7 @@ int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, uint32_t numDirs,
     goto fail;
 
   if (cy != NULL) {
-    if (theano_prep_output(cy, 3, PyGpuArray_DIMS(cx),
+    if (aesara_prep_output(cy, 3, PyGpuArray_DIMS(cx),
                            cx->ga.typecode, GA_C_ORDER, c) != 0)
       goto fail;
 
diff --git a/theano/gpuarray/c_code/dnn_rnn_gi.c b/aesara/gpuarray/c_code/dnn_rnn_gi.c
similarity index 97%
rename from theano/gpuarray/c_code/dnn_rnn_gi.c
rename to aesara/gpuarray/c_code/dnn_rnn_gi.c
index 718ffbe3a7..c6c9d450a2 100644
--- a/theano/gpuarray/c_code/dnn_rnn_gi.c
+++ b/aesara/gpuarray/c_code/dnn_rnn_gi.c
@@ -80,7 +80,7 @@ int dnn_rnn_gi(cudnnRNNDescriptor_t desc, npy_uint64 xshp,
   if (dcy != NULL)
     if (c_make_tensorNd(dcy, &dcydesc) != 0)
       goto fail;
-  
+
   if (c_make_filter(w, &wdesc) != 0)
     goto fail;
 
@@ -94,7 +94,7 @@ int dnn_rnn_gi(cudnnRNNDescriptor_t desc, npy_uint64 xshp,
   shape[0] = seqLength;
   shape[1] = miniBatch;
   shape[2] = inputSize;
-  if (theano_prep_output(dx, 3, shape, y->ga.typecode, GA_C_ORDER, c) != 0)
+  if (aesara_prep_output(dx, 3, shape, y->ga.typecode, GA_C_ORDER, c) != 0)
     goto fail;
 
   err = cudnnCreateTensorDescriptor(&dxdesc);
@@ -121,7 +121,7 @@ int dnn_rnn_gi(cudnnRNNDescriptor_t desc, npy_uint64 xshp,
     goto fail;
   }
 
-  if (theano_prep_output(dhx, 3, PyGpuArray_DIMS(hx), hx->ga.typecode,
+  if (aesara_prep_output(dhx, 3, PyGpuArray_DIMS(hx), hx->ga.typecode,
                          GA_C_ORDER, c) != 0)
     goto fail;
 
@@ -129,7 +129,7 @@ int dnn_rnn_gi(cudnnRNNDescriptor_t desc, npy_uint64 xshp,
     goto fail;
 
   if (cx != NULL) {
-    if (theano_prep_output(dcx, 3, PyGpuArray_DIMS(cx), cx->ga.typecode,
+    if (aesara_prep_output(dcx, 3, PyGpuArray_DIMS(cx), cx->ga.typecode,
                            GA_C_ORDER, c) != 0)
       goto fail;
 
@@ -145,7 +145,7 @@ int dnn_rnn_gi(cudnnRNNDescriptor_t desc, npy_uint64 xshp,
 
   for (size_t i = 0; i < seqLength; i++)
     yl[i] = ydesc;
-  
+
   dxl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), seqLength);
   if (dxl == NULL) {
     PyErr_NoMemory();
diff --git a/theano/gpuarray/c_code/dnn_rnn_gw.c b/aesara/gpuarray/c_code/dnn_rnn_gw.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_rnn_gw.c
rename to aesara/gpuarray/c_code/dnn_rnn_gw.c
index 0d004e5cbb..62fc8648e5 100644
--- a/theano/gpuarray/c_code/dnn_rnn_gw.c
+++ b/aesara/gpuarray/c_code/dnn_rnn_gw.c
@@ -91,7 +91,7 @@ int dnn_rnn_gw(cudnnRNNDescriptor_t desc, npy_uint64 _wsize,
     goto fail;
   }
 
-  if (theano_prep_output(dw, 1, &wsize, x->ga.typecode, GA_C_ORDER, c) != 0)
+  if (aesara_prep_output(dw, 1, &wsize, x->ga.typecode, GA_C_ORDER, c) != 0)
     goto fail;
   GpuArray_memset(&(*dw)->ga, 0);
 
diff --git a/theano/gpuarray/c_code/dnn_rnn_paramsize.c b/aesara/gpuarray/c_code/dnn_rnn_paramsize.c
similarity index 100%
rename from theano/gpuarray/c_code/dnn_rnn_paramsize.c
rename to aesara/gpuarray/c_code/dnn_rnn_paramsize.c
diff --git a/theano/gpuarray/c_code/dnn_softmax.c b/aesara/gpuarray/c_code/dnn_softmax.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_softmax.c
rename to aesara/gpuarray/c_code/dnn_softmax.c
index b4c1ceae3b..23c772576b 100644
--- a/theano/gpuarray/c_code/dnn_softmax.c
+++ b/aesara/gpuarray/c_code/dnn_softmax.c
@@ -39,7 +39,7 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x,
   PyGpuContextObject *c = x->context;
   cudnnStatus_t err;
 
-  if (theano_prep_output(out, PyGpuArray_NDIM(x),
+  if (aesara_prep_output(out, PyGpuArray_NDIM(x),
                          PyGpuArray_DIMS(x), x->ga.typecode,
                          GA_C_ORDER, c) != 0)
     return 1;
diff --git a/theano/gpuarray/c_code/dnn_softmax_grad.c b/aesara/gpuarray/c_code/dnn_softmax_grad.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_softmax_grad.c
rename to aesara/gpuarray/c_code/dnn_softmax_grad.c
index 17a9ccb585..8f4d76c2f5 100644
--- a/theano/gpuarray/c_code/dnn_softmax_grad.c
+++ b/aesara/gpuarray/c_code/dnn_softmax_grad.c
@@ -50,7 +50,7 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy,
   PyGpuContextObject *c = dy->context;
   cudnnStatus_t err;
 
-  if (theano_prep_output(dx, PyGpuArray_NDIM(dy),
+  if (aesara_prep_output(dx, PyGpuArray_NDIM(dy),
                          PyGpuArray_DIMS(dy), dy->ga.typecode,
                          GA_C_ORDER, c) != 0)
     return 1;
diff --git a/theano/gpuarray/c_code/dnn_sptf_gi.c b/aesara/gpuarray/c_code/dnn_sptf_gi.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_sptf_gi.c
rename to aesara/gpuarray/c_code/dnn_sptf_gi.c
index e85fea7c22..6e63fe6b3f 100644
--- a/theano/gpuarray/c_code/dnn_sptf_gi.c
+++ b/aesara/gpuarray/c_code/dnn_sptf_gi.c
@@ -14,7 +14,7 @@ APPLY_SPECIFIC(dydesc) = NULL;
 
 {
     cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
-    
+
     err = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf));
     if (err != CUDNN_STATUS_SUCCESS)
     {
@@ -117,12 +117,12 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
         return 1;
     }
 
-    if ( theano_prep_output( input_grad, PyGpuArray_NDIM( input ),
+    if ( aesara_prep_output( input_grad, PyGpuArray_NDIM( input ),
                              PyGpuArray_DIMS( input ), input->ga.typecode,
                              GA_C_ORDER, gpu_ctx ) != 0 )
         return 1;
 
-    if ( theano_prep_output( grid_grad, PyGpuArray_NDIM( grid ),
+    if ( aesara_prep_output( grid_grad, PyGpuArray_NDIM( grid ),
                              PyGpuArray_DIMS( grid ), grid->ga.typecode,
                              GA_C_ORDER, gpu_ctx ) != 0 )
         return 1;
diff --git a/theano/gpuarray/c_code/dnn_sptf_grid.c b/aesara/gpuarray/c_code/dnn_sptf_grid.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_sptf_grid.c
rename to aesara/gpuarray/c_code/dnn_sptf_grid.c
index dafc86fd36..4544d3da49 100644
--- a/theano/gpuarray/c_code/dnn_sptf_grid.c
+++ b/aesara/gpuarray/c_code/dnn_sptf_grid.c
@@ -99,7 +99,7 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
         return 1;
     }
 
-    if ( theano_prep_output( grid, 4, grid_dims, theta->ga.typecode,
+    if ( aesara_prep_output( grid, 4, grid_dims, theta->ga.typecode,
                              GA_C_ORDER, gpu_ctx ) != 0 )
     {
         PyErr_SetString( PyExc_RuntimeError,
diff --git a/theano/gpuarray/c_code/dnn_sptf_gt.c b/aesara/gpuarray/c_code/dnn_sptf_gt.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_sptf_gt.c
rename to aesara/gpuarray/c_code/dnn_sptf_gt.c
index c44532ca7c..5aff42d7ee 100644
--- a/theano/gpuarray/c_code/dnn_sptf_gt.c
+++ b/aesara/gpuarray/c_code/dnn_sptf_gt.c
@@ -61,7 +61,7 @@ APPLY_SPECIFIC(dnn_sptf_gt)(PyGpuArrayObject * dgrid,
     dtheta_dims[1] = 2;
     dtheta_dims[2] = 3;
 
-    if ( theano_prep_output( dtheta, 3, dtheta_dims, dgrid->ga.typecode,
+    if ( aesara_prep_output( dtheta, 3, dtheta_dims, dgrid->ga.typecode,
                              GA_C_ORDER, gpu_ctx ) != 0 )
         return 1;
 
diff --git a/theano/gpuarray/c_code/dnn_sptf_sampler.c b/aesara/gpuarray/c_code/dnn_sptf_sampler.c
similarity index 98%
rename from theano/gpuarray/c_code/dnn_sptf_sampler.c
rename to aesara/gpuarray/c_code/dnn_sptf_sampler.c
index 507a52483e..4e75cc8098 100644
--- a/theano/gpuarray/c_code/dnn_sptf_sampler.c
+++ b/aesara/gpuarray/c_code/dnn_sptf_sampler.c
@@ -110,7 +110,7 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
         return 1;
     }
 
-    if ( theano_prep_output( output, 4, out_dims, input->ga.typecode,
+    if ( aesara_prep_output( output, 4, out_dims, input->ga.typecode,
                              GA_C_ORDER, gpu_ctx ) != 0 )
     {
         PyErr_SetString( PyExc_MemoryError,
@@ -162,4 +162,3 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
 
     return 0;
 }
-
diff --git a/theano/gpuarray/c_code/gpuarray_helper.h b/aesara/gpuarray/c_code/gpuarray_helper.h
similarity index 79%
rename from theano/gpuarray/c_code/gpuarray_helper.h
rename to aesara/gpuarray/c_code/gpuarray_helper.h
index 9e897b4a73..5af8b4a46d 100644
--- a/theano/gpuarray/c_code/gpuarray_helper.h
+++ b/aesara/gpuarray/c_code/gpuarray_helper.h
@@ -1,5 +1,5 @@
-#ifndef THEANO_GPUARRAY_HELPER
-#define THEANO_GPUARRAY_HELPER
+#ifndef AESARA_GPUARRAY_HELPER
+#define AESARA_GPUARRAY_HELPER
 
 #include <string.h>
 #include <gpuarray_api.h>
@@ -7,17 +7,17 @@
 #include <gpuarray/util.h>
 
 
-static int theano_size_check(PyGpuArrayObject *a, unsigned int nd,
+static int aesara_size_check(PyGpuArrayObject *a, unsigned int nd,
                              const size_t *dims, int typecode) {
   return (a->ga.nd == nd && a->ga.typecode == typecode &&
           memcmp(a->ga.dimensions, dims, nd * sizeof(size_t)) == 0);
 }
 
-static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd,
+static int aesara_prep_output(PyGpuArrayObject **out, unsigned int nd,
                              const size_t *dims, int typecode, ga_order ord,
                              PyGpuContextObject *c) {
   if (*out != NULL &&
-      theano_size_check(*out, nd, dims, typecode)) {
+      aesara_size_check(*out, nd, dims, typecode)) {
     return 0;
   }
 
@@ -26,11 +26,11 @@ static int theano_prep_output(PyGpuArrayObject **out, unsigned int nd,
   return (*out == NULL) ? 1 : 0;
 }
 
-static PyGpuArrayObject *theano_try_copy(PyGpuArrayObject *out,
+static PyGpuArrayObject *aesara_try_copy(PyGpuArrayObject *out,
                                          PyGpuArrayObject *V) {
   if (out &&
       GpuArray_CHKFLAGS(&out->ga, GA_CARRAY) &&
-      theano_size_check(out, PyGpuArray_NDIM(V),
+      aesara_size_check(out, PyGpuArray_NDIM(V),
                         PyGpuArray_DIMS(V),
                         V->ga.typecode)) {
     if (pygpu_move(out, V)) {
diff --git a/theano/gpuarray/c_code/magma_cholesky.c b/aesara/gpuarray/c_code/magma_cholesky.c
similarity index 99%
rename from theano/gpuarray/c_code/magma_cholesky.c
rename to aesara/gpuarray/c_code/magma_cholesky.c
index 37db29cf71..f2f66f01c4 100644
--- a/theano/gpuarray/c_code/magma_cholesky.c
+++ b/aesara/gpuarray/c_code/magma_cholesky.c
@@ -76,7 +76,7 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
     *L = A;
     Py_INCREF(*L);
   } else {
-    *L = theano_try_copy(*L, A);
+    *L = aesara_try_copy(*L, A);
     if (*L == NULL) {
       PyErr_SetString(
           PyExc_RuntimeError,
diff --git a/theano/gpuarray/c_code/magma_eigh.c b/aesara/gpuarray/c_code/magma_eigh.c
similarity index 97%
rename from theano/gpuarray/c_code/magma_eigh.c
rename to aesara/gpuarray/c_code/magma_eigh.c
index c97e579469..d4d0e31994 100644
--- a/theano/gpuarray/c_code/magma_eigh.c
+++ b/aesara/gpuarray/c_code/magma_eigh.c
@@ -105,7 +105,7 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
   }
 
   d_dims[0] = N;
-  if (theano_prep_output(D, 1, d_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
+  if (aesara_prep_output(D, 1, d_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
     PyErr_SetString(PyExc_RuntimeError,
                     "GpuMagmaEigh: failed to allocate memory for the output");
     goto fail;
@@ -114,7 +114,7 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
              cudaMemcpyDeviceToDevice);
 
   if (params->compute_v) {
-    *V = theano_try_copy(*V, A);
+    *V = aesara_try_copy(*V, A);
     if (*V == NULL) {
       PyErr_SetString(PyExc_RuntimeError,
                       "GpuMagmaEigh: failed to allocate memory for the output");
diff --git a/theano/gpuarray/c_code/magma_inv.c b/aesara/gpuarray/c_code/magma_inv.c
similarity index 98%
rename from theano/gpuarray/c_code/magma_inv.c
rename to aesara/gpuarray/c_code/magma_inv.c
index 5cb473a2ba..a3f0f32763 100644
--- a/theano/gpuarray/c_code/magma_inv.c
+++ b/aesara/gpuarray/c_code/magma_inv.c
@@ -42,7 +42,7 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **A_inv,
     *A_inv = A;
     Py_INCREF(*A_inv);
   } else {
-    *A_inv = theano_try_copy(*A_inv, A);
+    *A_inv = aesara_try_copy(*A_inv, A);
     if (*A_inv == NULL) {
       PyErr_SetString(
           PyExc_RuntimeError,
diff --git a/theano/gpuarray/c_code/magma_qr.c b/aesara/gpuarray/c_code/magma_qr.c
similarity index 100%
rename from theano/gpuarray/c_code/magma_qr.c
rename to aesara/gpuarray/c_code/magma_qr.c
diff --git a/theano/gpuarray/c_code/magma_svd.c b/aesara/gpuarray/c_code/magma_svd.c
similarity index 96%
rename from theano/gpuarray/c_code/magma_svd.c
rename to aesara/gpuarray/c_code/magma_svd.c
index 53bff8c1e4..a96a0cc012 100644
--- a/theano/gpuarray/c_code/magma_svd.c
+++ b/aesara/gpuarray/c_code/magma_svd.c
@@ -120,7 +120,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
   }
 
   s_dims[0] = K;
-  if (theano_prep_output(S, 1, s_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
+  if (aesara_prep_output(S, 1, s_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
     PyErr_SetString(PyExc_RuntimeError,
                     "GpuMagmaSVD: failed to allocate memory");
     goto fail;
@@ -130,7 +130,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
 
   if (compute_uv) {
     u_dims[0] = N; u_dims[1] = N_VT;
-    if (theano_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
+    if (aesara_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
       PyErr_SetString(PyExc_RuntimeError,
                       "GpuMagmaSVD: failed to allocate memory");
       goto fail;
@@ -141,7 +141,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
                cudaMemcpyDeviceToDevice);
 
     vt_dims[0] = M_U; vt_dims[1] = M;
-    if (theano_prep_output(VT, 2, vt_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
+    if (aesara_prep_output(VT, 2, vt_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
       PyErr_SetString(PyExc_RuntimeError,
                       "GpuMagmaSVD: failed to allocate memory");
       goto fail;
diff --git a/theano/gpuarray/c_code/pool.c b/aesara/gpuarray/c_code/pool.c
similarity index 99%
rename from theano/gpuarray/c_code/pool.c
rename to aesara/gpuarray/c_code/pool.c
index 14a9e74e6c..34e227fef3 100644
--- a/theano/gpuarray/c_code/pool.c
+++ b/aesara/gpuarray/c_code/pool.c
@@ -271,7 +271,7 @@ int APPLY_SPECIFIC(pool)(PyGpuArrayObject *x,
     return 1;
   }
 
-  if (theano_prep_output(z, PyGpuArray_NDIM(x), z_dims,
+  if (aesara_prep_output(z, PyGpuArray_NDIM(x), z_dims,
                          x->ga.typecode, GA_C_ORDER, params->context) != 0)
     {
       PyErr_SetString(PyExc_RuntimeError,
diff --git a/theano/gpuarray/c_code/pool_ave_grad.c b/aesara/gpuarray/c_code/pool_ave_grad.c
similarity index 99%
rename from theano/gpuarray/c_code/pool_ave_grad.c
rename to aesara/gpuarray/c_code/pool_ave_grad.c
index 6f3ab6dfee..e666936e6d 100644
--- a/theano/gpuarray/c_code/pool_ave_grad.c
+++ b/aesara/gpuarray/c_code/pool_ave_grad.c
@@ -134,7 +134,7 @@ int APPLY_SPECIFIC(ave_pool_grad)(PyGpuArrayObject *x,
       PyErr_SetString(PyExc_ValueError, "GpuMaxPoolGrad: rank error");
       return 1;
     }
-  if (theano_prep_output(gx, PyGpuArray_NDIM(x), PyGpuArray_DIMS(x),
+  if (aesara_prep_output(gx, PyGpuArray_NDIM(x), PyGpuArray_DIMS(x),
                          x->ga.typecode, GA_C_ORDER, params->context) != 0)
     {
       PyErr_SetString(PyExc_RuntimeError,
diff --git a/theano/gpuarray/c_code/pool_grad_grad.c b/aesara/gpuarray/c_code/pool_grad_grad.c
similarity index 99%
rename from theano/gpuarray/c_code/pool_grad_grad.c
rename to aesara/gpuarray/c_code/pool_grad_grad.c
index a98a974aef..3da5b2a239 100644
--- a/theano/gpuarray/c_code/pool_grad_grad.c
+++ b/aesara/gpuarray/c_code/pool_grad_grad.c
@@ -128,7 +128,7 @@ int APPLY_SPECIFIC(pool_grad_grad)(PyGpuArrayObject *x,
       PyErr_SetString(PyExc_ValueError, "GpuPoolingGradGrad: rank error");
       return 1;
     }
-  if (theano_prep_output(gz, PyGpuArray_NDIM(z), PyGpuArray_DIMS(z),
+  if (aesara_prep_output(gz, PyGpuArray_NDIM(z), PyGpuArray_DIMS(z),
                          z->ga.typecode, GA_C_ORDER, ctx) != 0)
     {
       PyErr_SetString(PyExc_RuntimeError,
diff --git a/theano/gpuarray/c_code/pool_max_grad.c b/aesara/gpuarray/c_code/pool_max_grad.c
similarity index 99%
rename from theano/gpuarray/c_code/pool_max_grad.c
rename to aesara/gpuarray/c_code/pool_max_grad.c
index 0683d37404..efce13323d 100644
--- a/theano/gpuarray/c_code/pool_max_grad.c
+++ b/aesara/gpuarray/c_code/pool_max_grad.c
@@ -120,7 +120,7 @@ int APPLY_SPECIFIC(max_pool_grad)(PyGpuArrayObject *x,
       PyErr_SetString(PyExc_ValueError, "GpuMaxPoolGrad: rank error");
       return 1;
     }
-  if (theano_prep_output(gx, PyGpuArray_NDIM(x), PyGpuArray_DIMS(x),
+  if (aesara_prep_output(gx, PyGpuArray_NDIM(x), PyGpuArray_DIMS(x),
                          x->ga.typecode, GA_C_ORDER, ctx) != 0)
     {
       PyErr_SetString(PyExc_RuntimeError,
diff --git a/theano/gpuarray/c_code/pool_max_rop.c b/aesara/gpuarray/c_code/pool_max_rop.c
similarity index 99%
rename from theano/gpuarray/c_code/pool_max_rop.c
rename to aesara/gpuarray/c_code/pool_max_rop.c
index 3f1d7cf3e3..009106b966 100644
--- a/theano/gpuarray/c_code/pool_max_rop.c
+++ b/aesara/gpuarray/c_code/pool_max_rop.c
@@ -159,7 +159,7 @@ int APPLY_SPECIFIC(max_pool_rop)(PyGpuArrayObject *x,
     return 1;
   }
 
-  if (theano_prep_output(z, PyGpuArray_NDIM(ex), z_dims,
+  if (aesara_prep_output(z, PyGpuArray_NDIM(ex), z_dims,
                          ex->ga.typecode, GA_C_ORDER, params->context) != 0)
     {
       PyErr_SetString(PyExc_RuntimeError,
diff --git a/theano/gpuarray/c_code/topk_common.cuh b/aesara/gpuarray/c_code/topk_common.cuh
similarity index 86%
rename from theano/gpuarray/c_code/topk_common.cuh
rename to aesara/gpuarray/c_code/topk_common.cuh
index 72f8de7839..2bf31d069a 100644
--- a/theano/gpuarray/c_code/topk_common.cuh
+++ b/aesara/gpuarray/c_code/topk_common.cuh
@@ -395,39 +395,39 @@ static __device__ inline ga_half ptr_read_cached(ga_half *ptr, ssize_t offset) {
  * Following functions are provided to bypass these issues. */
 
 template<typename T>
-static __device__ inline T theano_zero() {return 0;}
+static __device__ inline T aesara_zero() {return 0;}
 template<>
-__device__ inline ga_half theano_zero() {return ga_float2half(0);}
+__device__ inline ga_half aesara_zero() {return ga_float2half(0);}
 
 template<typename T>
-static __device__ inline T theano_one() {return 1;}
+static __device__ inline T aesara_one() {return 1;}
 template<>
-__device__ inline ga_half theano_one() {return ga_float2half(1);}
-
-template<typename A, typename B> static __device__ inline bool theano_eq(const A& a, const B& b) {return a == b;}
-template<typename A, typename B> static __device__ inline bool theano_ne(const A& a, const B& b) {return a != b;}
-template<typename A, typename B> static __device__ inline bool theano_lt(const A& a, const B& b) {return a < b;}
-template<typename A, typename B> static __device__ inline bool theano_gt(const A& a, const B& b) {return a > b;}
-template<typename A, typename B> static __device__ inline bool theano_le(const A& a, const B& b) {return a <= b;}
-template<typename A, typename B> static __device__ inline bool theano_ge(const A& a, const B& b) {return a >= b;}
-
-template<typename T> static __device__ inline bool theano_eq(const ga_half& a, const T& b) {return ga_half2float(a) == b;}
-template<typename T> static __device__ inline bool theano_ne(const ga_half& a, const T& b) {return ga_half2float(a) != b;}
-template<typename T> static __device__ inline bool theano_lt(const ga_half& a, const T& b) {return ga_half2float(a) < b;}
-template<typename T> static __device__ inline bool theano_gt(const ga_half& a, const T& b) {return ga_half2float(a) > b;}
-template<typename T> static __device__ inline bool theano_le(const ga_half& a, const T& b) {return ga_half2float(a) <= b;}
-template<typename T> static __device__ inline bool theano_ge(const ga_half& a, const T& b) {return ga_half2float(a) >= b;}
-
-template<typename T> static __device__ inline bool theano_eq(const T& a, const ga_half& b) {return a == ga_half2float(b);}
-template<typename T> static __device__ inline bool theano_ne(const T& a, const ga_half& b) {return a != ga_half2float(b);}
-template<typename T> static __device__ inline bool theano_lt(const T& a, const ga_half& b) {return a < ga_half2float(b);}
-template<typename T> static __device__ inline bool theano_gt(const T& a, const ga_half& b) {return a > ga_half2float(b);}
-template<typename T> static __device__ inline bool theano_le(const T& a, const ga_half& b) {return a <= ga_half2float(b);}
-template<typename T> static __device__ inline bool theano_ge(const T& a, const ga_half& b) {return a >= ga_half2float(b);}
-
-static __device__ inline bool theano_eq(const ga_half& a, const ga_half& b) {return ga_half2float(a) == ga_half2float(b);}
-static __device__ inline bool theano_ne(const ga_half& a, const ga_half& b) {return ga_half2float(a) != ga_half2float(b);}
-static __device__ inline bool theano_lt(const ga_half& a, const ga_half& b) {return ga_half2float(a) < ga_half2float(b);}
-static __device__ inline bool theano_gt(const ga_half& a, const ga_half& b) {return ga_half2float(a) > ga_half2float(b);}
-static __device__ inline bool theano_le(const ga_half& a, const ga_half& b) {return ga_half2float(a) <= ga_half2float(b);}
-static __device__ inline bool theano_ge(const ga_half& a, const ga_half& b) {return ga_half2float(a) >= ga_half2float(b);}
+__device__ inline ga_half aesara_one() {return ga_float2half(1);}
+
+template<typename A, typename B> static __device__ inline bool aesara_eq(const A& a, const B& b) {return a == b;}
+template<typename A, typename B> static __device__ inline bool aesara_ne(const A& a, const B& b) {return a != b;}
+template<typename A, typename B> static __device__ inline bool aesara_lt(const A& a, const B& b) {return a < b;}
+template<typename A, typename B> static __device__ inline bool aesara_gt(const A& a, const B& b) {return a > b;}
+template<typename A, typename B> static __device__ inline bool aesara_le(const A& a, const B& b) {return a <= b;}
+template<typename A, typename B> static __device__ inline bool aesara_ge(const A& a, const B& b) {return a >= b;}
+
+template<typename T> static __device__ inline bool aesara_eq(const ga_half& a, const T& b) {return ga_half2float(a) == b;}
+template<typename T> static __device__ inline bool aesara_ne(const ga_half& a, const T& b) {return ga_half2float(a) != b;}
+template<typename T> static __device__ inline bool aesara_lt(const ga_half& a, const T& b) {return ga_half2float(a) < b;}
+template<typename T> static __device__ inline bool aesara_gt(const ga_half& a, const T& b) {return ga_half2float(a) > b;}
+template<typename T> static __device__ inline bool aesara_le(const ga_half& a, const T& b) {return ga_half2float(a) <= b;}
+template<typename T> static __device__ inline bool aesara_ge(const ga_half& a, const T& b) {return ga_half2float(a) >= b;}
+
+template<typename T> static __device__ inline bool aesara_eq(const T& a, const ga_half& b) {return a == ga_half2float(b);}
+template<typename T> static __device__ inline bool aesara_ne(const T& a, const ga_half& b) {return a != ga_half2float(b);}
+template<typename T> static __device__ inline bool aesara_lt(const T& a, const ga_half& b) {return a < ga_half2float(b);}
+template<typename T> static __device__ inline bool aesara_gt(const T& a, const ga_half& b) {return a > ga_half2float(b);}
+template<typename T> static __device__ inline bool aesara_le(const T& a, const ga_half& b) {return a <= ga_half2float(b);}
+template<typename T> static __device__ inline bool aesara_ge(const T& a, const ga_half& b) {return a >= ga_half2float(b);}
+
+static __device__ inline bool aesara_eq(const ga_half& a, const ga_half& b) {return ga_half2float(a) == ga_half2float(b);}
+static __device__ inline bool aesara_ne(const ga_half& a, const ga_half& b) {return ga_half2float(a) != ga_half2float(b);}
+static __device__ inline bool aesara_lt(const ga_half& a, const ga_half& b) {return ga_half2float(a) < ga_half2float(b);}
+static __device__ inline bool aesara_gt(const ga_half& a, const ga_half& b) {return ga_half2float(a) > ga_half2float(b);}
+static __device__ inline bool aesara_le(const ga_half& a, const ga_half& b) {return ga_half2float(a) <= ga_half2float(b);}
+static __device__ inline bool aesara_ge(const ga_half& a, const ga_half& b) {return ga_half2float(a) >= ga_half2float(b);}
diff --git a/theano/gpuarray/c_code/topk_dense.cu b/aesara/gpuarray/c_code/topk_dense.cu
similarity index 99%
rename from theano/gpuarray/c_code/topk_dense.cu
rename to aesara/gpuarray/c_code/topk_dense.cu
index efcb560937..9cf03f5928 100644
--- a/theano/gpuarray/c_code/topk_dense.cu
+++ b/aesara/gpuarray/c_code/topk_dense.cu
@@ -47,7 +47,7 @@ extern "C" __global__ void k_topk_dense(
     //}
 
     // get input and its radix friendly form
-    const INPUT_TYPE xval = is_topk ? ptr_at(src, idx*src_strides_0) : theano_zero<INPUT_TYPE>();
+    const INPUT_TYPE xval = is_topk ? ptr_at(src, idx*src_strides_0) : aesara_zero<INPUT_TYPE>();
     radix_t x = RadixConfig<INPUT_TYPE>::convert(xval);
 
     // resolve negative k
diff --git a/theano/gpuarray/c_code/topk_dense_large.cu b/aesara/gpuarray/c_code/topk_dense_large.cu
similarity index 96%
rename from theano/gpuarray/c_code/topk_dense_large.cu
rename to aesara/gpuarray/c_code/topk_dense_large.cu
index bc6a35f45a..494303517c 100644
--- a/theano/gpuarray/c_code/topk_dense_large.cu
+++ b/aesara/gpuarray/c_code/topk_dense_large.cu
@@ -15,19 +15,19 @@ __device__ DataType find_pattern(DataType* smem,
                              RadixType known_bits,
                              RadixType known_bits_mask) {
     if (threadIdx.x < 32)
-        smem[threadIdx.x] = theano_zero<DataType>();
+        smem[threadIdx.x] = aesara_zero<DataType>();
 
     local_barrier();
 
     // All threads participate in the loop, in order to sync on the flag
     for (CountType i = threadIdx.x; i < (slice_size + (CountType)blockDim.x-1); i += blockDim.x) {
         bool in_range = (i < slice_size);
-        DataType v = in_range ? ptr_read_cached(data, i*stride) : theano_zero<DataType>();
+        DataType v = in_range ? ptr_read_cached(data, i*stride) : aesara_zero<DataType>();
 
         if (in_range && ((RadixConfig<DataType>::convert(v) & known_bits_mask) == known_bits)) {
             // There should not be conflicts if we are using find_pattern,
             // since the result is unique
-            smem[0] = theano_one<DataType>();
+            smem[0] = aesara_one<DataType>();
             smem[1] = v; // can't use val as the flag, since it could be 0
         }
 
@@ -39,10 +39,10 @@ __device__ DataType find_pattern(DataType* smem,
         local_barrier();
 
         // Check to see if a thread found the value
-        if (theano_ne(found, 0))
+        if (aesara_ne(found, 0))
             return val;
     }
-    return theano_zero<DataType>();
+    return aesara_zero<DataType>();
 }
 
 // This function counts the distribution of all input values in a
@@ -260,12 +260,12 @@ extern "C" __global__ void KERNEL_NAME(
 
     for (int i = idx; i < iter_bound; i += blockDim.x) {
         bool in_range = (i < size);
-        INPUT_TYPE v = in_range ? ptr_read_cached(src, i*src_strides_0) : theano_zero<INPUT_TYPE>();
+        INPUT_TYPE v = in_range ? ptr_read_cached(src, i*src_strides_0) : aesara_zero<INPUT_TYPE>();
         bool has_topk;
         if (order) {
-            has_topk = in_range && (theano_gt(v, topkth_value));
+            has_topk = in_range && (aesara_gt(v, topkth_value));
         } else {
-            has_topk = in_range && (theano_lt(v, topkth_value));
+            has_topk = in_range && (aesara_lt(v, topkth_value));
         }
 
         int index = binary_cumsum_exclusive(idx, warp_id, smem, has_topk);
@@ -288,8 +288,8 @@ extern "C" __global__ void KERNEL_NAME(
 
     for (COUNT_TYPE i = idx; i < iter_bound; i += blockDim.x) {
         bool in_range = (i < size);
-        INPUT_TYPE v = in_range ? ptr_read_cached(src, i*src_strides_0) : theano_zero<INPUT_TYPE>();
-        bool has_topk = in_range && (theano_eq(v, topkth_value));
+        INPUT_TYPE v = in_range ? ptr_read_cached(src, i*src_strides_0) : aesara_zero<INPUT_TYPE>();
+        bool has_topk = in_range && (aesara_eq(v, topkth_value));
 
         int index = binary_cumsum_exclusive(idx, warp_id, smem, has_topk);
         int carry = smem[blockDim.x / 32 - 1];
@@ -311,4 +311,3 @@ extern "C" __global__ void KERNEL_NAME(
         write_base += carry;
     }
 }
-
diff --git a/theano/gpuarray/ctc.py b/aesara/gpuarray/ctc.py
similarity index 91%
rename from theano/gpuarray/ctc.py
rename to aesara/gpuarray/ctc.py
index 30c93f293b..95aff14682 100644
--- a/theano/gpuarray/ctc.py
+++ b/aesara/gpuarray/ctc.py
@@ -1,24 +1,24 @@
 import os
 import sys
 
-from theano.configdefaults import config
-from theano.gpuarray import pygpu
-from theano.gpuarray.basic_ops import (
+from aesara.configdefaults import config
+from aesara.gpuarray import pygpu
+from aesara.gpuarray.basic_ops import (
     as_gpuarray_variable,
     gpu_contiguous,
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.elemwise import GpuDimShuffle
-from theano.gpuarray.type import GpuArrayType, gpu_context_type
-from theano.gradient import grad_undefined
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonExternalCOp
-from theano.graph.opt import local_optimizer
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.basic_opt import register_canonicalize
-from theano.tensor.blas import batched_dot
-from theano.tensor.nnet.ctc import ctc_available
+from aesara.gpuarray.elemwise import GpuDimShuffle
+from aesara.gpuarray.type import GpuArrayType, gpu_context_type
+from aesara.gradient import grad_undefined
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonExternalCOp
+from aesara.graph.opt import local_optimizer
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.basic_opt import register_canonicalize
+from aesara.tensor.blas import batched_dot
+from aesara.tensor.nnet.ctc import ctc_available
 
 
 class GpuConnectionistTemporalClassification(_NoPythonExternalCOp):
diff --git a/theano/gpuarray/cudnn_defs.py b/aesara/gpuarray/cudnn_defs.py
similarity index 96%
rename from theano/gpuarray/cudnn_defs.py
rename to aesara/gpuarray/cudnn_defs.py
index c0d8db74bc..eb49d4b9e2 100644
--- a/theano/gpuarray/cudnn_defs.py
+++ b/aesara/gpuarray/cudnn_defs.py
@@ -1,8 +1,8 @@
 """
-Declarations of cuDNN types and constants used in Theano gpuarray DNN module.
+Declarations of cuDNN types and constants used in Aesara gpuarray DNN module.
 
-For every cuDNN API supported by Theano, this module defines a class that
-provides the set of cuDNN definitions to be used in Theano Ops.
+For every cuDNN API supported by Aesara, this module defines a class that
+provides the set of cuDNN definitions to be used in Aesara Ops.
 
 Use :func:`get_definitions` to get the right cuDNN definitions
 for a given cuDNN version.
@@ -16,7 +16,7 @@
 """
 
 
-from theano.graph.type import CEnumType
+from aesara.graph.type import CEnumType
 
 
 HALF, FLOAT, DOUBLE = ("float16", "float32", "float64")
@@ -149,7 +149,7 @@ class CuDNNV51:
     def get_supported_dtype_configs(self, check_runtime=None):
         """
         Return the tuple of data type configurations supported by this version of cuDNN.
-        This is currently convenient for all supported cuDNN versions, as Theano does not
+        This is currently convenient for all supported cuDNN versions, as Aesara does not
         yet support new data types (like INT8, INT8x4, etc.).
 
         ``check_runtime`` may be a function that tests if a data type configuration is supported.::
@@ -208,7 +208,7 @@ def fwd_algo_supports_dtype_config(self, algo, dtype, precision, ndim):
         return False
 
     def bwd_filter_algo_supports_dtype_config(self, algo, dtype, precision, ndim):
-        # NB: Theano does not support float16 precision anymore for backward cuDNN convolutions.
+        # NB: Aesara does not support float16 precision anymore for backward cuDNN convolutions.
         if is_true_half_config(dtype, precision):
             return False
         algorithms = self.cudnnConvolutionBwdFilterAlgo_t
@@ -231,7 +231,7 @@ def bwd_filter_algo_supports_dtype_config(self, algo, dtype, precision, ndim):
         return False
 
     def bwd_data_algo_supports_dtype_config(self, algo, dtype, precision, ndim):
-        # NB: Theano does not support float16 precision anymore for backward cuDNN convolutions.
+        # NB: Aesara does not support float16 precision anymore for backward cuDNN convolutions.
         if is_true_half_config(dtype, precision):
             return False
         algorithms = self.cudnnConvolutionBwdDataAlgo_t
@@ -390,10 +390,10 @@ class CuDNNV7(CuDNNV6):
 
 def get_definitions(cudnn_version=None):
     """
-    Return cuDNN definitions to be used by Theano for the given cuDNN version.
+    Return cuDNN definitions to be used by Aesara for the given cuDNN version.
 
     ``cudnn_version`` must be None or an integer
-    (typically the version returned by :func:`theano.gpuarray.dnn.version`).
+    (typically the version returned by :func:`Aesara.gpuarray.dnn.version`).
     if None, return definitions for the  most recent supported cuDNN version.
 
     """
diff --git a/theano/gpuarray/dnn.py b/aesara/gpuarray/dnn.py
similarity index 98%
rename from theano/gpuarray/dnn.py
rename to aesara/gpuarray/dnn.py
index 24249af6d6..4705bbf30f 100644
--- a/theano/gpuarray/dnn.py
+++ b/aesara/gpuarray/dnn.py
@@ -6,16 +6,16 @@
 
 import numpy as np
 
-import theano
-import theano.gpuarray.pathparse
-import theano.tensor.basic as tt
-import theano.tensor.math as tm
-from theano.assert_op import Assert
-from theano.compile.io import Out
-from theano.compile.mode import Mode
-from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME, config
-from theano.gpuarray import cudnn_defs, pygpu
-from theano.gpuarray.basic_ops import (
+import aesara
+import aesara.gpuarray.pathparse
+import aesara.tensor.basic as tt
+import aesara.tensor.math as tm
+from aesara.assert_op import Assert
+from aesara.compile.io import Out
+from aesara.compile.mode import Mode
+from aesara.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME, config
+from aesara.gpuarray import cudnn_defs, pygpu
+from aesara.gpuarray.basic_ops import (
     GpuAllocEmpty,
     GpuArrayType,
     HostFromGpu,
@@ -25,22 +25,22 @@
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.type import GpuArraySharedVariable, get_context, gpu_context_type
-from theano.gradient import DisconnectedType, grad_not_implemented
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import ExternalCOp, _NoPythonCOp, _NoPythonExternalCOp
-from theano.graph.params_type import ParamsType
-from theano.graph.type import CDataType, EnumList, Generic
-from theano.link.c.cmodule import GCC_compiler
-from theano.scalar import as_scalar
-from theano.scalar import bool as bool_t
-from theano.scalar import constant, get_scalar_type
-from theano.scalar import int32 as int_t
-from theano.scalar import uint32 as uint32_t
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.extra_ops import cpu_contiguous
-from theano.tensor.nnet.abstract_conv import (
+from aesara.gpuarray.type import GpuArraySharedVariable, get_context, gpu_context_type
+from aesara.gradient import DisconnectedType, grad_not_implemented
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import ExternalCOp, _NoPythonCOp, _NoPythonExternalCOp
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import CDataType, EnumList, Generic
+from aesara.link.c.cmodule import GCC_compiler
+from aesara.scalar import as_scalar
+from aesara.scalar import bool as bool_t
+from aesara.scalar import constant, get_scalar_type
+from aesara.scalar import int32 as int_t
+from aesara.scalar import uint32 as uint32_t
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.extra_ops import cpu_contiguous
+from aesara.tensor.nnet.abstract_conv import (
     AbstractConv2d,
     AbstractConv2d_gradInputs,
     AbstractConv2d_gradWeights,
@@ -50,8 +50,8 @@
     assert_conv_shape,
     get_conv_output_shape,
 )
-from theano.tensor.shape import reshape, shape_i, shape_i_op, shape_padright
-from theano.tensor.type import int_dtypes, integer_dtypes, values_eq_approx_always_true
+from aesara.tensor.shape import reshape, shape_i, shape_i_op, shape_padright
+from aesara.tensor.type import int_dtypes, integer_dtypes, values_eq_approx_always_true
 
 
 DNN_CONV_ALGO_CHOOSE_ONCE = ["guess_once", "time_once"]
@@ -66,7 +66,7 @@
 WIN32_CUDNN_NAMES = ["cudnn64_7.dll", "cudnn64_6.dll", "cudnn64_5.dll"]
 
 if sys.platform == "win32":
-    theano.gpuarray.pathparse.PathParser(config.dnn__bin_path)
+    aesara.gpuarray.pathparse.PathParser(config.dnn__bin_path)
 
 
 def _load_lib(name):
@@ -104,7 +104,7 @@ def _dnn_lib():
             if lib_name is None:
                 raise RuntimeError(
                     "Could not find cudnn library (looked for v5* to v7*)."
-                    " Check your cudnn installation. Maybe using the Theano"
+                    " Check your cudnn installation. Maybe using the Aesara"
                     f' flag dnn__base_path can help you. Current value "{config.dnn__base_path}"'
                 )
             else:
@@ -112,7 +112,7 @@ def _dnn_lib():
         if dnn_handle is None:
             raise RuntimeError(
                 "Could not load cudnn library. Check your cudnn"
-                " installation. Maybe using the Theano"
+                " installation. Maybe using the Aesara"
                 f' flag dnn__base_path can help you. Current value "{config.dnn__base_path}"'
             )
         _dnn_lib.handle = dnn_handle
@@ -196,8 +196,8 @@ def _dnn_check_version():
     if v >= 7200:
         warnings.warn(
             "Your cuDNN version is more recent than "
-            "Theano. If you encounter problems, try "
-            "updating Theano or downgrading cuDNN to "
+            "Aesara. If you encounter problems, try "
+            "updating Aesara or downgrading cuDNN to "
             "a version >= v5 and <= v7."
         )
     return True, None
@@ -281,12 +281,12 @@ def _get_func(self):
         The integer value is assumed to be a valid pointer for the
         type and no check is done to ensure that.
         """
-        from theano.scalar import get_scalar_type
+        from aesara.scalar import get_scalar_type
 
         if self._fn is None:
             with config.change_flags(compute_test_value="off"):
                 v = get_scalar_type("int64")()
-                self._fn = theano.function(
+                self._fn = aesara.function(
                     [v],
                     CDataMaker(self)(v),
                     mode=Mode(optimizer=None),
@@ -320,7 +320,7 @@ def do_constant_folding(self, fgraph, node):
         return False
 
     def make_node(self, val):
-        from theano.scalar import as_scalar
+        from aesara.scalar import as_scalar
 
         val = as_scalar(val).astype("uint64")
         return Apply(self, [val], [self.rtype()])
@@ -425,7 +425,7 @@ def version(raises=True):
             return -1
 
     if version.v is None:
-        f = theano.function([], DnnVersion()(), Mode(optimizer=None), profile=False)
+        f = aesara.function([], DnnVersion()(), Mode(optimizer=None), profile=False)
         v = f()
         if v[0] != v[1]:
             raise RuntimeError(
@@ -445,7 +445,7 @@ def version(raises=True):
 
 
 def get_precision(precision, inputs, for_grad=False):
-    common_dtype = theano.scalar.upcast(*[i.dtype for i in inputs])
+    common_dtype = aesara.scalar.upcast(*[i.dtype for i in inputs])
     if not common_dtype.startswith("float"):
         raise TypeError("cuDNN convolution only works on real numbers")
 
@@ -715,7 +715,7 @@ def ensure_dt(val, default, name, dtype):
         val = constant(val)
     if hasattr(val, "ndim") and val.ndim == 0:
         val = as_scalar(val)
-    if not isinstance(val.type, theano.scalar.Scalar):
+    if not isinstance(val.type, aesara.scalar.Scalar):
         raise TypeError(f"{name}: expected a scalar value")
     if not val.type.dtype == dtype:
         val = val.astype(dtype)
@@ -1004,14 +1004,14 @@ def make_node(self, img, topgrad, output, desc, alpha=None, beta=None):
                 "cuDNN backward filter operation for 3D convolutions may produce bad results "
                 "with certain cuDNN algorithms depending on the compute capability of your GPU "
                 "if subsample is not (1, 1, 1). If you encounter problems, consider "
-                'setting the theano flag "dnn__conv__algo_bwd_filter" to "none".'
+                'setting the aesara flag "dnn__conv__algo_bwd_filter" to "none".'
             )
         if self.op_may_fail_with_beta(img, beta):
             warnings.warn(
                 "cuDNN backward filter operation for convolutions may produce bad results "
                 "with certain cuDNN algorithms depending on the compute capability of your GPU "
                 "if beta != 1. If you encounter problems, consider "
-                'setting the theano flag "dnn__conv__algo_bwd_filter" to '
+                'setting the aesara flag "dnn__conv__algo_bwd_filter" to '
                 '"none", "deterministic", "fft", or "small".'
             )
         ctx_name = infer_context_name(img, topgrad, output)
@@ -1976,9 +1976,9 @@ def L_op(self, inp, outputs, grads):
 
         return (
             g_out,
-            theano.gradient.DisconnectedType()(),
-            theano.gradient.DisconnectedType()(),
-            theano.gradient.DisconnectedType()(),
+            aesara.gradient.DisconnectedType()(),
+            aesara.gradient.DisconnectedType()(),
+            aesara.gradient.DisconnectedType()(),
         )
 
     def connection_pattern(self, node):
@@ -2631,7 +2631,7 @@ def c_code_cache_version_apply(self, node):
 
 
 def _make_dropout_desc(dropout, seed, context_name):
-    desc, states = theano.function(
+    desc, states = aesara.function(
         [],
         _DropoutDescriptor(context_name)(dropout, seed, context_name),
         Mode(optimizer=None),
@@ -2737,7 +2737,7 @@ def _make_rnn_desc(
     dtype,
     context_name,
 ):
-    desc = theano.function(
+    desc = aesara.function(
         [],
         _RNNDescriptor(context_name)(
             hidden_size, num_layers, ddesc, input_mode, direction_mode, rnn_mode, dtype
@@ -2773,7 +2773,7 @@ def make_node(self, desc, input_size, typecode):
 
 def _get_param_size(desc, input_size, dtype, context_name):
     typecode = gpuarray.dtype_to_typecode(dtype)
-    return theano.function(
+    return aesara.function(
         [],
         _RNNParamSize(context_name)(desc, input_size, typecode),
         Mode(optimizer=None),
@@ -3019,7 +3019,7 @@ def _split_rnn_params(w, desc, layer, input_size, dtype, rnn_mode):
     typecode = gpuarray.dtype_to_typecode(dtype)
     outs = _RNNSplitParams(rnn_mode)(w, desc, layer, input_size, typecode)
     outs = [Out(o, borrow=True) for o in outs]
-    return theano.function([], outs, Mode(optimizer=None), profile=False)()
+    return aesara.function([], outs, Mode(optimizer=None), profile=False)()
 
 
 class GpuDnnRNNOp(DnnBase):
@@ -3084,7 +3084,7 @@ def L_op(self, inputs, outputs, output_grads):
         # Since the op return two outputs which contain essentially
         # the same information, the user will most likely only use one
         # of them. This leads to the situation that the other is
-        # considered "disconnected" by theano in the gradient.
+        # considered "disconnected" by aesara in the gradient.
         # However we know that this isn't really the case so we fix it
         # here.
 
@@ -3181,9 +3181,9 @@ def make_node(self, desc, x, hx, y, reserve, w):
 class RNNBlock:
     """
     An object that allow us to use CuDNN RNN implementation.
-    TODO: make an example how to use. You can check Theano tests
+    TODO: make an example how to use. You can check Aesara tests
     test_dnn_rnn_gru() and test_dnn_rnn_lstm() in the file
-    theano/gpuarray/tests/test_dnn.py for now.
+    aesara/gpuarray/tests/test_dnn.py for now.
 
 
     Parameters
@@ -3370,7 +3370,7 @@ def dnn_batch_normalization_train(
 
     Notes
     -----
-    Requires cuDNN 5 and Theano 0.9dev2 or more recent.
+    Requires cuDNN 5 and Aesara 0.9dev2 or more recent.
 
     For 4d tensors, returned values are equivalent to:
 
@@ -3504,7 +3504,7 @@ def dnn_batch_normalization_test(
 
     Notes
     -----
-    Requires cuDNN 5 and Theano 0.9dev2 or more recent.
+    Requires cuDNN 5 and Aesara 0.9dev2 or more recent.
 
     For 4d tensors, the returned value is equivalent to:
 
diff --git a/theano/gpuarray/dnn_opt.py b/aesara/gpuarray/dnn_opt.py
similarity index 95%
rename from theano/gpuarray/dnn_opt.py
rename to aesara/gpuarray/dnn_opt.py
index af79d087a3..befd94c09d 100644
--- a/theano/gpuarray/dnn_opt.py
+++ b/aesara/gpuarray/dnn_opt.py
@@ -1,13 +1,13 @@
-import theano
-from theano.compile import optdb
-from theano.gpuarray.basic_ops import (
+import aesara
+from aesara.compile import optdb
+from aesara.gpuarray.basic_ops import (
     GpuAllocEmpty,
     GpuArrayType,
     as_gpuarray_variable,
     gpu_contiguous,
     infer_context_name,
 )
-from theano.gpuarray.dnn import (
+from aesara.gpuarray.dnn import (
     GpuDnnBatchNorm,
     GpuDnnBatchNormInference,
     GpuDnnConv,
@@ -28,9 +28,9 @@
     local_abstractconv_cudnn_graph,
     version,
 )
-from theano.gpuarray.elemwise import GpuCAReduceCuda, GpuElemwise
-from theano.gpuarray.nnet import GpuSoftmax
-from theano.gpuarray.opt_util import (
+from aesara.gpuarray.elemwise import GpuCAReduceCuda, GpuElemwise
+from aesara.gpuarray.nnet import GpuSoftmax
+from aesara.gpuarray.opt_util import (
     alpha_merge,
     inplace_allocempty,
     op_lifter,
@@ -38,7 +38,7 @@
     pad_dims,
     unpad_dims,
 )
-from theano.gpuarray.optdb import (
+from aesara.gpuarray.optdb import (
     gpu_seqopt,
     pool_db,
     pool_db2,
@@ -46,12 +46,12 @@
     register_opt,
     register_opt2,
 )
-from theano.gpuarray.reduction import GpuMaxAndArgmax
-from theano.gpuarray.type import list_contexts
-from theano.graph.opt import GlobalOptimizer, inherit_stack_trace, local_optimizer
-from theano.scalar import Log
-from theano.tensor.math import Argmax
-from theano.tensor.nnet.abstract_conv import (
+from aesara.gpuarray.reduction import GpuMaxAndArgmax
+from aesara.gpuarray.type import list_contexts
+from aesara.graph.opt import GlobalOptimizer, inherit_stack_trace, local_optimizer
+from aesara.scalar import Log
+from aesara.tensor.math import Argmax
+from aesara.tensor.nnet.abstract_conv import (
     AbstractConv2d,
     AbstractConv2d_gradInputs,
     AbstractConv2d_gradWeights,
@@ -61,9 +61,9 @@
     assert_conv_shape,
     get_conv_output_shape,
 )
-from theano.tensor.nnet.basic import LogSoftmax, SoftmaxGrad
-from theano.tensor.shape import shape_i_op
-from theano.tensor.signal.pool import AveragePoolGrad, MaxPoolGrad, Pool
+from aesara.tensor.nnet.basic import LogSoftmax, SoftmaxGrad
+from aesara.tensor.shape import shape_i_op
+from aesara.tensor.signal.pool import AveragePoolGrad, MaxPoolGrad, Pool
 
 
 @local_optimizer([AbstractConv2d, AbstractConv3d])
@@ -426,7 +426,7 @@ def local_dnn_convgi_inplace(node, inputs):
 
 optdb.register(
     "local_dnna_conv_inplace",
-    theano.graph.opt.in2out(
+    aesara.graph.opt.in2out(
         local_dnn_conv_inplace,
         local_dnn_convgw_inplace,
         local_dnn_convgi_inplace,
@@ -724,23 +724,23 @@ def _identity(a):
         return a
 
     def _square(a):
-        return GpuElemwise(theano.scalar.basic.sqr)(a)
+        return GpuElemwise(aesara.scalar.basic.sqr)(a)
 
     scal = node.op.scalar_op.name
     post = _identity
 
     if node.op.pre_scalar_op is not None:
-        if isinstance(node.op.scalar_op, theano.scalar.basic.Add):
-            if isinstance(node.op.pre_scalar_op, theano.scalar.basic.Sqr):
+        if isinstance(node.op.scalar_op, aesara.scalar.basic.Add):
+            if isinstance(node.op.pre_scalar_op, aesara.scalar.basic.Sqr):
                 scal = "norm2"
                 post = _square
-            elif isinstance(node.op.pre_scalar_op, theano.scalar.basic.Abs):
+            elif isinstance(node.op.pre_scalar_op, aesara.scalar.basic.Abs):
                 scal = "norm1"
             else:
                 return
         elif isinstance(
-            node.op.scalar_op, theano.scalar.basic.ScalarMaximum
-        ) and isinstance(node.op.pre_scalar_op, theano.scalar.basic.Abs):
+            node.op.scalar_op, aesara.scalar.basic.ScalarMaximum
+        ) and isinstance(node.op.pre_scalar_op, aesara.scalar.basic.Abs):
             scal = "absmax"
         else:
             return
@@ -826,10 +826,10 @@ def apply(self, fgraph):
         """
         for c in list_contexts():
             if not dnn_available(c):
-                # Make an assert error as we want Theano to fail, not
+                # Make an assert error as we want Aesara to fail, not
                 # just skip this optimization.
                 raise AssertionError(
-                    "cuDNN optimization was enabled, but Theano was not able "
+                    "cuDNN optimization was enabled, but Aesara was not able "
                     "to use it for context "
                     + str(c)
                     + ". We got this error: \n"
diff --git a/theano/gpuarray/elemwise.py b/aesara/gpuarray/elemwise.py
similarity index 99%
rename from theano/gpuarray/elemwise.py
rename to aesara/gpuarray/elemwise.py
index d56166d1ed..6e06a45e12 100644
--- a/theano/gpuarray/elemwise.py
+++ b/aesara/gpuarray/elemwise.py
@@ -3,15 +3,15 @@
 
 import numpy as np
 
-from theano import scalar as ts
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonOp
-from theano.graph.utils import MethodNotDefined
-from theano.link.c.interface import HideC
-from theano.scalar import Composite, Scalar
-from theano.scalar.basic import complex_types, upgrade_to_float_no_complex
-from theano.scalar.basic_scipy import Erfcinv, Erfinv
-from theano.tensor.elemwise import CAReduceDtype, DimShuffle, Elemwise
+from aesara import scalar as ts
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonOp
+from aesara.graph.utils import MethodNotDefined
+from aesara.link.c.interface import HideC
+from aesara.scalar import Composite, Scalar
+from aesara.scalar.basic import complex_types, upgrade_to_float_no_complex
+from aesara.scalar.basic_scipy import Erfcinv, Erfinv
+from aesara.tensor.elemwise import CAReduceDtype, DimShuffle, Elemwise
 
 
 try:
@@ -495,7 +495,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype, _NoPythonOp):
 
     Examples
     --------
-    When scalar_op is a theano.scalar.basic.Add instance:
+    When scalar_op is an `aesara.scalar.basic.Add` instance:
 
       - reduce_mask == (1,) sums a vector to a scalar
 
@@ -668,7 +668,7 @@ def c_code(self, node, name, inp, out, sub):
 
         nd_in = node.inputs[0].type.ndim
         nd_out = node.outputs[0].type.ndim
-        # For complex, we need to use theano_complex* in the c code to
+        # For complex, we need to use aesara_complex* in the c code to
         # have it run. But libgpuarray don't understand it.
         in_dtype = node.inputs[0].type.dtype_specs()[1]
         out_dtype = node.outputs[0].type.dtype_specs()[1]
@@ -1046,7 +1046,7 @@ def _k_init(self, node, nodename):
         in_dtype = node.inputs[0].dtype
         out_dtype = node.outputs[0].dtype
         acc_dtype = self._acc_dtype(node.inputs[0].dtype)
-        # We need to use theano_complex* and not npy_complex*
+        # We need to use aesara_complex* and not npy_complex*
         in_type = gpuarray.dtype_to_ctype(in_dtype)
         out_type = gpuarray.dtype_to_ctype(out_dtype)
         acc_type = gpuarray.dtype_to_ctype(acc_dtype)
diff --git a/theano/gpuarray/extra_ops.py b/aesara/gpuarray/extra_ops.py
similarity index 98%
rename from theano/gpuarray/extra_ops.py
rename to aesara/gpuarray/extra_ops.py
index 3076ef716f..f2d6cce765 100644
--- a/theano/gpuarray/extra_ops.py
+++ b/aesara/gpuarray/extra_ops.py
@@ -1,6 +1,6 @@
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonOp
-from theano.tensor.extra_ops import CumOp
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonOp
+from aesara.tensor.extra_ops import CumOp
 
 
 try:
@@ -8,8 +8,8 @@
 except ImportError:
     pass
 
-import theano.scalar as scalar
-from theano.gpuarray.basic_ops import (
+import aesara.scalar as scalar
+from aesara.gpuarray.basic_ops import (
     GpuKernelBaseCOp,
     GpuReshape,
     Kernel,
@@ -17,9 +17,9 @@
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.opt import op_lifter, register_opt, register_opt2
-from theano.gpuarray.type import gpu_context_type
-from theano.graph.params_type import ParamsType
+from aesara.gpuarray.opt import op_lifter, register_opt, register_opt2
+from aesara.gpuarray.type import gpu_context_type
+from aesara.graph.params_type import ParamsType
 
 
 class GpuCumOp(GpuKernelBaseCOp, _NoPythonOp):
@@ -315,7 +315,7 @@ def c_code(self, node, nodename, inp, out, sub):
                 axis += PyGpuArray_NDIM(%(x)s);
             }
 
-            if (theano_prep_output(&%(z)s, PyGpuArray_NDIM(%(x)s), PyGpuArray_DIMS(%(x)s),
+            if (aesara_prep_output(&%(z)s, PyGpuArray_NDIM(%(x)s), PyGpuArray_DIMS(%(x)s),
                                    %(x)s->ga.typecode, GA_C_ORDER, %(params)s->context) != 0) {
                 %(fail)s;
             }
diff --git a/theano/gpuarray/fft.py b/aesara/gpuarray/fft.py
similarity index 96%
rename from theano/gpuarray/fft.py
rename to aesara/gpuarray/fft.py
index 6cb3572a0e..0393902282 100644
--- a/theano/gpuarray/fft.py
+++ b/aesara/gpuarray/fft.py
@@ -1,20 +1,20 @@
 import numpy as np
 
-from theano.gpuarray.basic_ops import (
+from aesara.gpuarray.basic_ops import (
     as_gpuarray_variable,
     gpu_contiguous,
     infer_context_name,
 )
-from theano.gpuarray.opt import op_lifter, register_opt, register_opt2
-from theano.gpuarray.type import GpuArrayType
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonOp
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.fft import IRFFTOp
-from theano.tensor.math import sqrt
-from theano.tensor.subtensor import set_subtensor
-from theano.tensor.type import integer_dtypes
+from aesara.gpuarray.opt import op_lifter, register_opt, register_opt2
+from aesara.gpuarray.type import GpuArrayType
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonOp
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.fft import IRFFTOp
+from aesara.tensor.math import sqrt
+from aesara.tensor.subtensor import set_subtensor
+from aesara.tensor.type import integer_dtypes
 
 
 try:
diff --git a/theano/gpuarray/fp16_help.py b/aesara/gpuarray/fp16_help.py
similarity index 100%
rename from theano/gpuarray/fp16_help.py
rename to aesara/gpuarray/fp16_help.py
diff --git a/theano/gpuarray/kernel_codegen.py b/aesara/gpuarray/kernel_codegen.py
similarity index 100%
rename from theano/gpuarray/kernel_codegen.py
rename to aesara/gpuarray/kernel_codegen.py
diff --git a/theano/gpuarray/linalg.py b/aesara/gpuarray/linalg.py
similarity index 97%
rename from theano/gpuarray/linalg.py
rename to aesara/gpuarray/linalg.py
index 35d745a541..3935cec69b 100644
--- a/theano/gpuarray/linalg.py
+++ b/aesara/gpuarray/linalg.py
@@ -4,21 +4,21 @@
 import pkg_resources
 from numpy.linalg.linalg import LinAlgError
 
-from theano.configdefaults import config
-from theano.gpuarray.basic_ops import (
+from aesara.configdefaults import config
+from aesara.gpuarray.basic_ops import (
     CGpuKernelBase,
     as_gpuarray_variable,
     gpu_contiguous,
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.type import GpuArrayType, gpu_context_type
-from theano.graph.basic import Apply
-from theano.graph.op import ExternalCOp, Op
-from theano.graph.params_type import ParamsType
-from theano.scalar import bool as bool_t
-from theano.tensor import basic as tt
-from theano.tensor import math as tm
+from aesara.gpuarray.type import GpuArrayType, gpu_context_type
+from aesara.graph.basic import Apply
+from aesara.graph.op import ExternalCOp, Op
+from aesara.graph.params_type import ParamsType
+from aesara.scalar import bool as bool_t
+from aesara.tensor import basic as tt
+from aesara.tensor import math as tm
 
 
 try:
@@ -332,7 +332,7 @@ def perform(self, node, inputs, outputs):
         z[0] = b
 
     def L_op(self, inputs, outputs, output_gradients):
-        # Modified from theano/tensor/slinalg.py
+        # Modified from aesara/tensor/slinalg.py
         A, b = inputs
         c = outputs[0]
         c_bar = output_gradients[0]
@@ -487,7 +487,7 @@ def perform(self, node, inputs, outputs):
         x[0] = b
 
     def L_op(self, inputs, outputs, output_gradients):
-        # Modified from theano/tensor/slinalg.py
+        # Modified from aesara/tensor/slinalg.py
         A, b = inputs
         c = outputs[0]
         c_bar = output_gradients[0]
@@ -599,7 +599,7 @@ def perform(self, node, inputs, outputs):
 
         # The output matrix will contain only the upper or lower
         # triangular factorization of A. If L is C ordered (it
-        # probably is as it is the default in Theano) we just switch
+        # probably is as it is the default in Aesara) we just switch
         # the fill mode parameter of cusolver
         l_parameter = 0 if self.lower else 1
         if L.flags["C_CONTIGUOUS"]:
@@ -654,7 +654,7 @@ def perform(self, node, inputs, outputs):
         outputs[0][0] = L
 
     def L_op(self, inputs, outputs, gradients):
-        # Modified from theano/tensor/slinalg.py
+        # Modified from aesara/tensor/slinalg.py
         # No handling for on_error = 'nan'
         dz = gradients[0]
         chol_x = outputs[0]
@@ -745,7 +745,7 @@ class GpuMagmaSVD(GpuMagmaBase):
     .. warning::
 
         Because of implementation constraints, this Op returns outputs
-        in order ``S, U, VT``. Use :func:`theano.gpuarray.linalg.gpu_svd`
+        in order ``S, U, VT``. Use :func:`aesara.gpuarray.linalg.gpu_svd`
         to get them in expected order ``U, S, VT``.
 
     """
@@ -940,7 +940,7 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase):
     .. warning::
 
         Because of implementation constraints, this Op returns outputs
-        in order ``R, Q``. Use :func:`theano.gpuarray.linalg.gpu_qr`
+        in order ``R, Q``. Use :func:`aesara.gpuarray.linalg.gpu_qr`
         to get them in expected order ``Q, R``.
     """
 
diff --git a/theano/gpuarray/multinomial.py b/aesara/gpuarray/multinomial.py
similarity index 95%
rename from theano/gpuarray/multinomial.py
rename to aesara/gpuarray/multinomial.py
index fdccf41f33..408267b668 100644
--- a/theano/gpuarray/multinomial.py
+++ b/aesara/gpuarray/multinomial.py
@@ -8,24 +8,24 @@
 except ImportError:
     pass
 
-import theano
-import theano.sandbox.multinomial
-from theano.gpuarray.basic_ops import (
+import aesara
+import aesara.sandbox.multinomial
+from aesara.gpuarray.basic_ops import (
     GpuKernelBaseCOp,
     Kernel,
     as_gpuarray_variable,
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.elemwise import GpuDimShuffle
-from theano.gpuarray.fp16_help import load_w, work_dtype, write_w
-from theano.gpuarray.opt import op_lifter, register_opt, register_opt2
-from theano.gpuarray.type import GpuArrayType
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonOp
-from theano.scalar import as_scalar
-from theano.tensor.basic import get_scalar_constant_value
-from theano.tensor.exceptions import NotScalarConstantError
+from aesara.gpuarray.elemwise import GpuDimShuffle
+from aesara.gpuarray.fp16_help import load_w, work_dtype, write_w
+from aesara.gpuarray.opt import op_lifter, register_opt, register_opt2
+from aesara.gpuarray.type import GpuArrayType
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonOp
+from aesara.scalar import as_scalar
+from aesara.tensor.basic import get_scalar_constant_value
+from aesara.tensor.exceptions import NotScalarConstantError
 
 
 class GPUAMultinomialFromUniform(GpuKernelBaseCOp, _NoPythonOp):
@@ -184,7 +184,7 @@ def c_code(self, node, name, inp, outputs, sub):
 
     dims[0] = PyGpuArray_DIMS(pvals)[1];
     dims[1] = PyGpuArray_DIMS(pvals)[0];
-    if (theano_prep_output(&out, 2, dims, %(out_typecode)s,
+    if (aesara_prep_output(&out, 2, dims, %(out_typecode)s,
                            GA_C_ORDER, %(ctx)s) != 0){
       %(fail)s
     }
@@ -434,7 +434,7 @@ def c_code(self, node, name, inp, outputs, sub):
     dims[0] = n_samples;
     dims[1] = PyGpuArray_DIMS(pvals)[0];
 
-    if (theano_prep_output(&out, 2, dims, GA_LONG,
+    if (aesara_prep_output(&out, 2, dims, GA_LONG,
                            GA_C_ORDER, %(ctx)s) != 0){
         Py_DECREF(pvals_copy);
         %(fail)s
@@ -494,8 +494,8 @@ def c_code_cache_version(self):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.sandbox.multinomial.MultinomialFromUniform])
-@register_opt2([theano.sandbox.multinomial.MultinomialFromUniform], "fast_compile")
+@op_lifter([aesara.sandbox.multinomial.MultinomialFromUniform])
+@register_opt2([aesara.sandbox.multinomial.MultinomialFromUniform], "fast_compile")
 def local_gpua_multinomial(op, context_name, inputs, outputs):
     # TODO : need description for function
 
@@ -515,8 +515,8 @@ def local_gpua_multinomial(op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.sandbox.multinomial.ChoiceFromUniform])
-@register_opt2([theano.sandbox.multinomial.ChoiceFromUniform], "fast_compile")
+@op_lifter([aesara.sandbox.multinomial.ChoiceFromUniform])
+@register_opt2([aesara.sandbox.multinomial.ChoiceFromUniform], "fast_compile")
 def local_gpua_multinomial_wor(op, context_name, inputs, outputs):
     # TODO : need description for function
     p, u, n = inputs
diff --git a/theano/gpuarray/neighbours.py b/aesara/gpuarray/neighbours.py
similarity index 98%
rename from theano/gpuarray/neighbours.py
rename to aesara/gpuarray/neighbours.py
index 3e6e0a40b9..0a5376254e 100644
--- a/theano/gpuarray/neighbours.py
+++ b/aesara/gpuarray/neighbours.py
@@ -1,9 +1,9 @@
-import theano.tensor as tt
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonOp
-from theano.graph.params_type import ParamsType
-from theano.tensor.nnet.neighbours import Images2Neibs
-from theano.tensor.type import integer_dtypes
+import aesara.tensor as tt
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonOp
+from aesara.graph.params_type import ParamsType
+from aesara.tensor.nnet.neighbours import Images2Neibs
+from aesara.tensor.type import integer_dtypes
 
 
 try:
@@ -11,13 +11,13 @@
 except ImportError:
     pass
 
-from theano.gpuarray.basic_ops import (
+from aesara.gpuarray.basic_ops import (
     GpuKernelBaseCOp,
     Kernel,
     as_gpuarray_variable,
     infer_context_name,
 )
-from theano.gpuarray.type import GpuArrayType, gpu_context_type
+from aesara.gpuarray.type import GpuArrayType, gpu_context_type
 
 
 class GpuImages2Neibs(GpuKernelBaseCOp, Images2Neibs, _NoPythonOp):
diff --git a/theano/gpuarray/nnet.py b/aesara/gpuarray/nnet.py
similarity index 99%
rename from theano/gpuarray/nnet.py
rename to aesara/gpuarray/nnet.py
index 5d78a4c0ee..17bbaf9535 100644
--- a/theano/gpuarray/nnet.py
+++ b/aesara/gpuarray/nnet.py
@@ -2,8 +2,8 @@
 
 import numpy as np
 
-from theano.graph.basic import Apply
-from theano.graph.op import _NoPythonOp
+from aesara.graph.basic import Apply
+from aesara.graph.op import _NoPythonOp
 
 
 try:
@@ -12,15 +12,15 @@
 except ImportError:
     pass
 
-from theano.gpuarray.basic_ops import (
+from aesara.gpuarray.basic_ops import (
     GpuKernelBaseCOp,
     Kernel,
     as_gpuarray_variable,
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.fp16_help import load_w, work_dtype, write_w
-from theano.gpuarray.type import GpuArrayType
+from aesara.gpuarray.fp16_help import load_w, work_dtype, write_w
+from aesara.gpuarray.type import GpuArrayType
 
 
 class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBaseCOp, _NoPythonOp):
@@ -240,9 +240,9 @@ def c_code(self, node, nodename, inp, out, sub):
                             "dimension mismatch in x,b arguments");
             %(fail)s;
         }
-        if (theano_prep_output(&%(nll)s, 1, PyGpuArray_DIMS(%(y_idx)s), %(x)s->ga.typecode, GA_C_ORDER, %(ctx)s)) %(fail)s
-        if (theano_prep_output(&%(sm)s, 2, PyGpuArray_DIMS(%(x)s), %(x)s->ga.typecode, GA_C_ORDER, %(ctx)s)) %(fail)s
-        if (theano_prep_output(&%(am)s, 1, PyGpuArray_DIMS(%(y_idx)s), %(y_idx)s->ga.typecode, GA_C_ORDER, %(ctx)s)) %(fail)s
+        if (aesara_prep_output(&%(nll)s, 1, PyGpuArray_DIMS(%(y_idx)s), %(x)s->ga.typecode, GA_C_ORDER, %(ctx)s)) %(fail)s
+        if (aesara_prep_output(&%(sm)s, 2, PyGpuArray_DIMS(%(x)s), %(x)s->ga.typecode, GA_C_ORDER, %(ctx)s)) %(fail)s
+        if (aesara_prep_output(&%(am)s, 1, PyGpuArray_DIMS(%(y_idx)s), %(y_idx)s->ga.typecode, GA_C_ORDER, %(ctx)s)) %(fail)s
         {
             size_t n_blocks = std::min(PyGpuArray_DIM(%(x)s, 0), (size_t)4096);
             size_t n_threads = std::min(PyGpuArray_DIM(%(x)s, 1), (size_t)256);
diff --git a/theano/gpuarray/opt.py b/aesara/gpuarray/opt.py
similarity index 94%
rename from theano/gpuarray/opt.py
rename to aesara/gpuarray/opt.py
index 086244bfa2..92595de04e 100644
--- a/theano/gpuarray/opt.py
+++ b/aesara/gpuarray/opt.py
@@ -7,17 +7,17 @@
 
 import numpy as np
 
-import theano
-import theano.tensor.nlinalg as nlinalg
-import theano.tensor.signal.pool as pool
-import theano.tensor.slinalg as slinalg
-from theano import scalar as ts
-from theano import tensor as tt
-from theano.assert_op import Assert
-from theano.breakpoint import PdbBreakpoint
-from theano.compile import optdb
-from theano.configdefaults import config
-from theano.gpuarray.basic_ops import (
+import aesara
+import aesara.tensor.nlinalg as nlinalg
+import aesara.tensor.signal.pool as pool
+import aesara.tensor.slinalg as slinalg
+from aesara import scalar as ts
+from aesara import tensor as tt
+from aesara.assert_op import Assert
+from aesara.breakpoint import PdbBreakpoint
+from aesara.compile import optdb
+from aesara.configdefaults import config
+from aesara.gpuarray.basic_ops import (
     GpuAlloc,
     GpuAllocEmpty,
     GpuContiguous,
@@ -35,7 +35,7 @@
     host_from_gpu,
     infer_context_name,
 )
-from theano.gpuarray.blas import (
+from aesara.gpuarray.blas import (
     GpuCorr3dMM,
     GpuCorr3dMM_gradInputs,
     GpuCorr3dMM_gradWeights,
@@ -52,7 +52,7 @@
     gpugemv_inplace,
     gpugemv_no_inplace,
 )
-from theano.gpuarray.blocksparse import (
+from aesara.gpuarray.blocksparse import (
     GpuSparseBlockGemv,
     GpuSparseBlockOuter,
     gpu_sparse_block_gemv,
@@ -60,15 +60,15 @@
     gpu_sparse_block_outer,
     gpu_sparse_block_outer_inplace,
 )
-from theano.gpuarray.ctc import GpuConnectionistTemporalClassification
-from theano.gpuarray.dnn_opt import (
+from aesara.gpuarray.ctc import GpuConnectionistTemporalClassification
+from aesara.gpuarray.dnn_opt import (
     local_abstractconv3d_cudnn_alt,
     local_abstractconv_cudnn,
     local_abstractconv_cudnn_alt,
     local_abstractconv_gi_cudnn,
     local_abstractconv_gw_cudnn,
 )
-from theano.gpuarray.elemwise import (
+from aesara.gpuarray.elemwise import (
     GpuCAReduceCPY,
     GpuCAReduceCuda,
     GpuDimShuffle,
@@ -77,7 +77,7 @@
     gpu_erfinv,
     max_inputs_to_GpuElemwise,
 )
-from theano.gpuarray.linalg import (
+from aesara.gpuarray.linalg import (
     MATRIX_STRUCTURES_SOLVE,
     GpuCholesky,
     GpuCublasTriangularSolve,
@@ -90,14 +90,14 @@
     gpu_qr,
     gpu_svd,
 )
-from theano.gpuarray.neighbours import GpuImages2Neibs
-from theano.gpuarray.nnet import (
+from aesara.gpuarray.neighbours import GpuImages2Neibs
+from aesara.gpuarray.nnet import (
     gpu_crossentropy_softmax_1hot_with_bias_dx,
     gpu_crossentropy_softmax_argmax_1hot_with_bias,
     gpu_softmax,
     gpu_softmax_with_bias,
 )
-from theano.gpuarray.opt_util import (
+from aesara.gpuarray.opt_util import (
     alpha_merge,
     op_lifter,
     output_merge,
@@ -106,7 +106,7 @@
     safe_to_gpu,
     unpad_dims,
 )
-from theano.gpuarray.optdb import (
+from aesara.gpuarray.optdb import (
     GraphToGPUDB,
     abstract_batch_norm_db,
     abstract_batch_norm_db2,
@@ -123,15 +123,15 @@
     register_opt,
     register_opt2,
 )
-from theano.gpuarray.pool import (
+from aesara.gpuarray.pool import (
     GpuAveragePoolGrad,
     GpuDownsampleFactorMaxGradGrad,
     GpuMaxPoolGrad,
     GpuMaxPoolRop,
     GpuPool,
 )
-from theano.gpuarray.reduction import GpuMaxAndArgmax
-from theano.gpuarray.subtensor import (
+from aesara.gpuarray.reduction import GpuMaxAndArgmax
+from aesara.gpuarray.subtensor import (
     GpuAdvancedIncSubtensor,
     GpuAdvancedIncSubtensor1,
     GpuAdvancedIncSubtensor1_dev20,
@@ -142,31 +142,31 @@
     GpuIncSubtensor,
     GpuSubtensor,
 )
-from theano.gpuarray.type import (
+from aesara.gpuarray.type import (
     ContextNotDefined,
     GpuArrayConstant,
     GpuArrayType,
     get_context,
     move_to_gpu,
 )
-from theano.graph import toolbox
-from theano.graph.basic import Constant, Variable, applys_between, clone_replace
-from theano.graph.fg import FunctionGraph
-from theano.graph.opt import (
+from aesara.graph import toolbox
+from aesara.graph.basic import Constant, Variable, applys_between, clone_replace
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.opt import (
     GlobalOptimizer,
     LocalMetaOptimizer,
     copy_stack_trace,
     inherit_stack_trace,
     local_optimizer,
 )
-from theano.ifelse import IfElse
-from theano.link.c.basic import CLinker
-from theano.misc.ordered_set import OrderedSet
-from theano.scalar.basic import Cast, Pow, Scalar, log, neg, true_div
-from theano.scalar.basic_scipy import Erfcinv, Erfinv
-from theano.scan.op import Scan
-from theano.scan.opt import ScanInplaceOptimizer
-from theano.tensor.basic import (
+from aesara.ifelse import IfElse
+from aesara.link.c.basic import CLinker
+from aesara.misc.ordered_set import OrderedSet
+from aesara.scalar.basic import Cast, Pow, Scalar, log, neg, true_div
+from aesara.scalar.basic_scipy import Erfcinv, Erfinv
+from aesara.scan.op import Scan
+from aesara.scan.opt import ScanInplaceOptimizer
+from aesara.tensor.basic import (
     Alloc,
     AllocDiag,
     AllocEmpty,
@@ -178,9 +178,9 @@
     Split,
     Tri,
 )
-from theano.tensor.math import MaxAndArgmax
-from theano.tensor.nnet import batchnorm, conv3d2d
-from theano.tensor.nnet.abstract_conv import (
+from aesara.tensor.math import MaxAndArgmax
+from aesara.tensor.nnet import batchnorm, conv3d2d
+from aesara.tensor.nnet.abstract_conv import (
     AbstractConv2d,
     AbstractConv2d_gradInputs,
     AbstractConv2d_gradWeights,
@@ -190,15 +190,15 @@
     BaseAbstractConv,
     get_conv_output_shape,
 )
-from theano.tensor.nnet.blocksparse import SparseBlockGemv, SparseBlockOuter
-from theano.tensor.nnet.conv import ConvOp
-from theano.tensor.nnet.ctc import ConnectionistTemporalClassification
-from theano.tensor.nnet.neighbours import Images2Neibs
-from theano.tensor.shape import Reshape, Shape, SpecifyShape, shape_i, specify_shape
-from theano.tensor.type import TensorType
+from aesara.tensor.nnet.blocksparse import SparseBlockGemv, SparseBlockOuter
+from aesara.tensor.nnet.conv import ConvOp
+from aesara.tensor.nnet.ctc import ConnectionistTemporalClassification
+from aesara.tensor.nnet.neighbours import Images2Neibs
+from aesara.tensor.shape import Reshape, Shape, SpecifyShape, shape_i, specify_shape
+from aesara.tensor.type import TensorType
 
 
-_logger = logging.getLogger("theano.gpuarray.opt")
+_logger = logging.getLogger("aesara.gpuarray.opt")
 
 
 gpu_seqopt.register(
@@ -223,12 +223,12 @@
     "gpuarray_cut_transfers", gpu_cut_copies, 2, "fast_compile", "fast_run", "gpuarray"
 )
 
-register_opt("fast_compile")(theano.tensor.basic_opt.local_track_shape_i)
+register_opt("fast_compile")(aesara.tensor.basic_opt.local_track_shape_i)
 register_opt(final_opt=True, name="gpua_constant_folding")(
-    theano.tensor.basic_opt.constant_folding
+    aesara.tensor.basic_opt.constant_folding
 )
 gpu_optimizer.register(
-    "local_remove_all_assert", theano.tensor.basic_opt.local_remove_all_assert, "unsafe"
+    "local_remove_all_assert", aesara.tensor.basic_opt.local_remove_all_assert, "unsafe"
 )
 
 
@@ -402,7 +402,7 @@ def apply(self, fgraph):
                         break
             outputs = []
 
-            if isinstance(new_ops, theano.graph.op.Op):
+            if isinstance(new_ops, aesara.graph.op.Op):
                 with inherit_stack_trace(node.outputs):
                     outputs = new_ops(
                         *[mapping[i] for i in node.inputs], return_list=True
@@ -432,7 +432,7 @@ def apply(self, fgraph):
                 ):
                     _logger.warning(
                         f"The optimization {lopt} returned bad dtype. Skipping it."
-                        " Write to theano-dev mailing list about this."
+                        " Write to aesara-dev mailing list about this."
                     )
                     newnode = node.clone_with_new_inputs(
                         [mapping.get(i) for i in node.inputs]
@@ -614,7 +614,7 @@ def local_cut_gpu_transfers(fgraph, node):
 )
 gpu_cut_copies.register(
     "cut_gpua_constant_transfers",
-    theano.tensor.basic_opt.constant_folding,
+    aesara.tensor.basic_opt.constant_folding,
     "fast_compile",
     "fast_run",
     "gpuarray",
@@ -700,7 +700,7 @@ def local_gpua_alloc_empty_to_zeros(fgraph, node):
 
 optdb.register(
     "local_gpua_alloc_empty_to_zeros",
-    theano.graph.opt.in2out(local_gpua_alloc_empty_to_zeros),
+    aesara.graph.opt.in2out(local_gpua_alloc_empty_to_zeros),
     # After move to gpu and merge2, before inplace.
     49.3,
     "alloc_empty_to_zeros",
@@ -721,8 +721,8 @@ def local_gpu_contiguous_gpu_contiguous(fgraph, node):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.extra_ops.CpuContiguous])
-@register_opt2([theano.tensor.extra_ops.CpuContiguous], "fast_compile")
+@op_lifter([aesara.tensor.extra_ops.CpuContiguous])
+@register_opt2([aesara.tensor.extra_ops.CpuContiguous], "fast_compile")
 def local_gpua_contiguous(fgraph, op, context_name, inputs, outputs):
     return gpu_contiguous
 
@@ -756,8 +756,8 @@ def local_gpua_flatten(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.elemwise.Elemwise])
-@register_opt2([theano.tensor.elemwise.Elemwise], "fast_compile")
+@op_lifter([aesara.tensor.elemwise.Elemwise])
+@register_opt2([aesara.tensor.elemwise.Elemwise], "fast_compile")
 def local_gpua_elemwise(fgraph, op, context_name, inputs, outputs):
     scal_op = op.scalar_op
     name = op.name
@@ -837,12 +837,12 @@ def split_inputs(inputs, max_nb_inputs, op):
 
     Parameters
     ----------
-    inputs: List of theano variables.
+    inputs: List of aesara variables.
             List of inputs to node.
     max_nb_inputs: int
                    Maximum number of inputs the node can handle without
                    compilation fail.
-    op : Theano operator instance.
+    op : Aesara operator instance.
          Operator that should be used to rebuild the computation graph with smaller
          number of inputs per node.
     """
@@ -861,7 +861,7 @@ def split_inputs(inputs, max_nb_inputs, op):
     return op(*inputs)
 
 
-gpu_local_elemwise_fusion = theano.tensor.basic_opt.local_elemwise_fusion_op(
+gpu_local_elemwise_fusion = aesara.tensor.basic_opt.local_elemwise_fusion_op(
     GpuElemwise, max_inputs_to_GpuElemwise
 )
 optdb.register(
@@ -870,7 +870,7 @@ def split_inputs(inputs, max_nb_inputs, op):
     # 48.6 specialize
     # 49 cpu fusion
     # 49.5 add destroy handler
-    theano.tensor.basic_opt.FusionOptimizer(gpu_local_elemwise_fusion),
+    aesara.tensor.basic_opt.FusionOptimizer(gpu_local_elemwise_fusion),
     49,
     "fast_run",
     "fusion",
@@ -878,7 +878,7 @@ def split_inputs(inputs, max_nb_inputs, op):
     "gpuarray",
 )
 
-inplace_gpu_elemwise_opt = theano.tensor.basic_opt.InplaceElemwiseOptimizer(GpuElemwise)
+inplace_gpu_elemwise_opt = aesara.tensor.basic_opt.InplaceElemwiseOptimizer(GpuElemwise)
 optdb.register(
     "gpua_inplace_opt",
     inplace_gpu_elemwise_opt,
@@ -889,12 +889,12 @@ def split_inputs(inputs, max_nb_inputs, op):
     "gpuarray",
 )
 
-register_opt(theano.tensor.basic_opt.local_useless_elemwise)
+register_opt(aesara.tensor.basic_opt.local_useless_elemwise)
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.elemwise.DimShuffle])
-@register_opt2([theano.tensor.elemwise.DimShuffle], "fast_compile")
+@op_lifter([aesara.tensor.elemwise.DimShuffle])
+@register_opt2([aesara.tensor.elemwise.DimShuffle], "fast_compile")
 def local_gpua_dimshuffle(fgraph, op, context_name, inputs, outputs):
     return GpuDimShuffle(op.input_broadcastable, op.new_order)
 
@@ -935,8 +935,8 @@ def gpu_print_wrapper(op, cnda):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.printing.Print])
-@register_opt2([theano.printing.Print], "fast_compile")
+@op_lifter([aesara.printing.Print])
+@register_opt2([aesara.printing.Print], "fast_compile")
 def local_gpua_print_op(fgraph, op, context_name, inputs, outputs):
     (x,) = inputs
     with inherit_stack_trace(outputs):
@@ -1065,7 +1065,7 @@ def local_gpua_split(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.subtensor.Subtensor])
+@op_lifter([aesara.tensor.subtensor.Subtensor])
 def local_gpua_subtensor(fgraph, op, context_name, inputs, outputs):
     x = inputs[0]
     if x.owner and isinstance(x.owner.op, HostFromGpu):
@@ -1097,7 +1097,7 @@ def local_gpua_subtensor(fgraph, op, context_name, inputs, outputs):
     return GpuSubtensor(op.idx_list)
 
 
-@register_opt2([theano.tensor.subtensor.Subtensor], "fast_compile")
+@register_opt2([aesara.tensor.subtensor.Subtensor], "fast_compile")
 def local_gpua_subtensor_graph(fgraph, op, context_name, inputs, outputs):
     # We need different code as the condition is different as inputs
     # aren't the same.
@@ -1122,8 +1122,8 @@ def local_gpua_subtensor_graph(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.subtensor.IncSubtensor])
-@register_opt2([theano.tensor.subtensor.IncSubtensor], "fast_compile")
+@op_lifter([aesara.tensor.subtensor.IncSubtensor])
+@register_opt2([aesara.tensor.subtensor.IncSubtensor], "fast_compile")
 def local_gpua_inc_subtensor(fgraph, op, context_name, inputs, outputs):
     op = GpuIncSubtensor(
         op.idx_list,
@@ -1138,22 +1138,22 @@ def local_gpua_inc_subtensor(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.subtensor.AdvancedSubtensor1])
-@register_opt2([theano.tensor.subtensor.AdvancedSubtensor1], "fast_compile")
+@op_lifter([aesara.tensor.subtensor.AdvancedSubtensor1])
+@register_opt2([aesara.tensor.subtensor.AdvancedSubtensor1], "fast_compile")
 def local_gpua_advanced_subtensor1(fgraph, op, context_name, inputs, outputs):
     return GpuAdvancedSubtensor1()
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.subtensor.AdvancedSubtensor])
-@register_opt2([theano.tensor.subtensor.AdvancedSubtensor], "fast_compile")
+@op_lifter([aesara.tensor.subtensor.AdvancedSubtensor])
+@register_opt2([aesara.tensor.subtensor.AdvancedSubtensor], "fast_compile")
 def local_gpua_advanced_subtensor(fgraph, op, context_name, inputs, outputs):
     return GpuAdvancedSubtensor()
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.subtensor.AdvancedIncSubtensor1])
-@register_opt2([theano.tensor.subtensor.AdvancedIncSubtensor1], "fast_compile")
+@op_lifter([aesara.tensor.subtensor.AdvancedIncSubtensor1])
+@register_opt2([aesara.tensor.subtensor.AdvancedIncSubtensor1], "fast_compile")
 def local_gpua_advanced_incsubtensor1(fgraph, op, context_name, inputs, outputs):
     x, y, ilist = inputs
 
@@ -1186,8 +1186,8 @@ def local_gpua_advanced_incsubtensor1(fgraph, op, context_name, inputs, outputs)
 # Do not register this optimization for now, as it slows down the
 # execution by a lot in important cases.
 # @register_opt('fast_compile')
-# @op_lifter([theano.tensor.subtensor.AdvancedIncSubtensor])
-# @register_opt2([theano.tensor.subtensor.AdvancedIncSubtensor], 'fast_compile')
+# @op_lifter([aesara.tensor.subtensor.AdvancedIncSubtensor])
+# @register_opt2([aesara.tensor.subtensor.AdvancedIncSubtensor], 'fast_compile')
 def local_gpua_advanced_incsubtensor(fgraph, op, context_name, inputs, outputs):
     if not op.set_instead_of_inc:
         return GpuAdvancedIncSubtensor()
@@ -1227,16 +1227,16 @@ def local_gpu_extract_diag(fgraph, op, context_name, inputs, outputs):
 @register_opt("fast_compile")
 @op_lifter(
     [
-        theano.tensor.elemwise.CAReduce,
-        theano.tensor.math.Sum,
-        theano.tensor.math.Prod,
+        aesara.tensor.elemwise.CAReduce,
+        aesara.tensor.math.Sum,
+        aesara.tensor.math.Prod,
     ]
 )
 @register_opt2(
     [
-        theano.tensor.elemwise.CAReduce,
-        theano.tensor.math.Sum,
-        theano.tensor.math.Prod,
+        aesara.tensor.elemwise.CAReduce,
+        aesara.tensor.math.Sum,
+        aesara.tensor.math.Prod,
     ],
     "fast_compile",
 )
@@ -1334,8 +1334,8 @@ def local_gpua_careduce(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.blas.Gemv, theano.tensor.blas_c.CGemv])
-@register_opt2([theano.tensor.blas.Gemv], "fast_compile")
+@op_lifter([aesara.tensor.blas.Gemv, aesara.tensor.blas_c.CGemv])
+@register_opt2([aesara.tensor.blas.Gemv], "fast_compile")
 def local_gpua_gemv(fgraph, op, context_name, inputs, outputs):
     if inputs[0].dtype == "float16":
         # Use gemm implementation as cublas gemv don't support float16
@@ -1352,8 +1352,8 @@ def local_gpua_gemv(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.blas.Gemm])
-@register_opt2([theano.tensor.blas.Gemm], "fast_compile")
+@op_lifter([aesara.tensor.blas.Gemm])
+@register_opt2([aesara.tensor.blas.Gemm], "fast_compile")
 def local_gpua_gemm(fgraph, op, context_name, inputs, outputs):
     if inputs[0].dtype not in ["float16", "float32", "float64"]:
         return
@@ -1364,8 +1364,8 @@ def local_gpua_gemm(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.blas.BatchedDot])
-@register_opt2([theano.tensor.blas.BatchedDot], "fast_compile")
+@op_lifter([aesara.tensor.blas.BatchedDot])
+@register_opt2([aesara.tensor.blas.BatchedDot], "fast_compile")
 def local_gpua_gemmbatch(fgraph, op, context_name, inputs, outputs):
     if inputs[0].dtype not in ["float16", "float32", "float64"]:
         return
@@ -1426,16 +1426,16 @@ def local_gpua_gemmbatch_output_merge(fgraph, node, *inputs):
 @register_opt("fast_compile")
 @op_lifter(
     [
-        theano.tensor.blas.Ger,
-        theano.tensor.blas_c.CGer,
-        theano.tensor.blas_scipy.ScipyGer,
+        aesara.tensor.blas.Ger,
+        aesara.tensor.blas_c.CGer,
+        aesara.tensor.blas_scipy.ScipyGer,
     ]
 )
 @register_opt2(
     [
-        theano.tensor.blas.Ger,
-        theano.tensor.blas_c.CGer,
-        theano.tensor.blas_scipy.ScipyGer,
+        aesara.tensor.blas.Ger,
+        aesara.tensor.blas_c.CGer,
+        aesara.tensor.blas_scipy.ScipyGer,
     ],
     "fast_compile",
 )
@@ -1446,15 +1446,15 @@ def local_gpua_ger(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.blas.Dot22])
-@register_opt2([theano.tensor.blas.Dot22], "fast_compile")
+@op_lifter([aesara.tensor.blas.Dot22])
+@register_opt2([aesara.tensor.blas.Dot22], "fast_compile")
 def local_gpua_dot22(fgraph, op, context_name, inputs, outputs):
     return gpu_dot22
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.blas.Dot22Scalar])
-@register_opt2([theano.tensor.blas.Dot22Scalar], "fast_compile")
+@op_lifter([aesara.tensor.blas.Dot22Scalar])
+@register_opt2([aesara.tensor.blas.Dot22Scalar], "fast_compile")
 def local_gpua_dot22scalar(fgraph, op, context_name, inputs, outputs):
     with inherit_stack_trace(outputs):
         x, y, a = inputs
@@ -1479,9 +1479,9 @@ def local_gpua_tri(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.nnet.basic.CrossentropySoftmaxArgmax1HotWithBias])
+@op_lifter([aesara.tensor.nnet.basic.CrossentropySoftmaxArgmax1HotWithBias])
 @register_opt2(
-    [theano.tensor.nnet.basic.CrossentropySoftmaxArgmax1HotWithBias], "fast_compile"
+    [aesara.tensor.nnet.basic.CrossentropySoftmaxArgmax1HotWithBias], "fast_compile"
 )
 def local_gpua_crossentropysoftmaxargmax1hotwithbias(
     fgraph, op, context_name, inputs, outputs
@@ -1490,9 +1490,9 @@ def local_gpua_crossentropysoftmaxargmax1hotwithbias(
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.nnet.basic.CrossentropySoftmax1HotWithBiasDx])
+@op_lifter([aesara.tensor.nnet.basic.CrossentropySoftmax1HotWithBiasDx])
 @register_opt2(
-    [theano.tensor.nnet.basic.CrossentropySoftmax1HotWithBiasDx], "fast_compile"
+    [aesara.tensor.nnet.basic.CrossentropySoftmax1HotWithBiasDx], "fast_compile"
 )
 def local_gpua_crossentropysoftmax1hotwithbiasdx(
     fgraph, op, context_name, inputs, outputs
@@ -1501,22 +1501,22 @@ def local_gpua_crossentropysoftmax1hotwithbiasdx(
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.nnet.basic.Softmax])
-@register_opt2([theano.tensor.nnet.basic.Softmax], "fast_compile")
+@op_lifter([aesara.tensor.nnet.basic.Softmax])
+@register_opt2([aesara.tensor.nnet.basic.Softmax], "fast_compile")
 def local_gpua_softmax(fgraph, op, context_name, inputs, outputs):
     return gpu_softmax
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.nnet.basic.SoftmaxWithBias])
-@register_opt2([theano.tensor.nnet.basic.SoftmaxWithBias], "fast_compile")
+@op_lifter([aesara.tensor.nnet.basic.SoftmaxWithBias])
+@register_opt2([aesara.tensor.nnet.basic.SoftmaxWithBias], "fast_compile")
 def local_gpua_softmaxwithbias(fgraph, op, context_name, inputs, outputs):
     return gpu_softmax_with_bias
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.nnet.basic.CrossentropyCategorical1Hot])
-@register_opt2([theano.tensor.nnet.basic.CrossentropyCategorical1Hot], "fast_compile")
+@op_lifter([aesara.tensor.nnet.basic.CrossentropyCategorical1Hot])
+@register_opt2([aesara.tensor.nnet.basic.CrossentropyCategorical1Hot], "fast_compile")
 def local_gpu_crossentropycategorical1hot(fgraph, op, context_name, inputs, outputs):
     # There is no corresponding GPU Op, but we can express it as:
     #   coding, one_of_n = inputs
@@ -1527,9 +1527,9 @@ def local_gpu_crossentropycategorical1hot(fgraph, op, context_name, inputs, outp
 
 
 @register_opt("fast_compile")
-@op_lifter([theano.tensor.nnet.basic.CrossentropyCategorical1HotGrad])
+@op_lifter([aesara.tensor.nnet.basic.CrossentropyCategorical1HotGrad])
 @register_opt2(
-    [theano.tensor.nnet.basic.CrossentropyCategorical1HotGrad], "fast_compile"
+    [aesara.tensor.nnet.basic.CrossentropyCategorical1HotGrad], "fast_compile"
 )
 def local_gpu_crossentropycategorical1hotgrad(
     fgraph, op, context_name, inputs, outputs
@@ -1545,7 +1545,7 @@ def local_gpu_crossentropycategorical1hotgrad(
         as_gpuarray_variable(np.zeros((), dtype=coding.dtype), context_name),
         *[shape_i(coding, i) for i in range(coding.ndim)],
     )
-    gcoding = theano.tensor.subtensor.set_subtensor(
+    gcoding = aesara.tensor.subtensor.set_subtensor(
         z[idx0, one_of_n], gpu_neg(gpu_true_div(gy, coding[idx0, one_of_n]))
     )
     return [gcoding.transfer(context_name)]
@@ -1573,7 +1573,7 @@ def local_gpua_error_convop(fgraph, op, context_name, inputs, outputs):
 ConvOp does not work with the gpuarray backend.
 
 Use the new convolution interface to have GPU convolution working:
-theano.tensor.nnet.conv2d()
+aesara.tensor.nnet.conv2d()
 """
     )
 
@@ -2266,7 +2266,7 @@ def provide_inputs(self, node, inputs):
         if type(node.op) in [AbstractConv2d, AbstractConv3d]:
             img, kern = node.inputs
             for (var, shape) in zip((img, kern), shapes):
-                result[var] = theano.shared(
+                result[var] = aesara.shared(
                     np.random.random(shape).astype(var.dtype),
                     var.name,
                     broadcastable=var.broadcastable,
@@ -2288,7 +2288,7 @@ def provide_inputs(self, node, inputs):
             result[kshape] = tt.as_tensor_variable(node.op.kshp[-convdim:])
 
             for (var, shape) in zip((img, top), (node.op.imshp, tshp)):
-                result[var] = theano.shared(
+                result[var] = aesara.shared(
                     np.random.random(shape).astype(var.dtype),
                     var.name,
                     broadcastable=var.broadcastable,
@@ -2309,7 +2309,7 @@ def provide_inputs(self, node, inputs):
             result[ishape] = tt.as_tensor_variable(node.op.imshp[2:])
 
             for (var, shape) in zip((kern, top), (node.op.kshp, tshp)):
-                result[var] = theano.shared(
+                result[var] = aesara.shared(
                     np.random.random(shape).astype(var.dtype),
                     var.name,
                     broadcastable=var.broadcastable,
@@ -2602,7 +2602,7 @@ def local_assert_no_cpu_op(fgraph, node):
 
 
 # Register the local_assert_no_cpu_op:
-assert_no_cpu_op = theano.graph.opt.in2out(
+assert_no_cpu_op = aesara.graph.opt.in2out(
     local_assert_no_cpu_op, name="assert_no_cpu_op"
 )
 # 49.2 is after device specialization & fusion optimizations for last transfers
@@ -2746,7 +2746,7 @@ def local_gpua_images2neibs(fgraph, op, context_name, inputs, outputs):
 # solve
 @register_opt("fast_compile")
 @op_lifter([slinalg.Solve])
-@register_opt2([theano.tensor.slinalg.Solve], "fast_compile")
+@register_opt2([aesara.tensor.slinalg.Solve], "fast_compile")
 def local_gpu_solve(fgraph, op, context_name, inputs, outputs):
     if inputs[0].dtype not in ["float16", "float32", "float64"]:
         return
@@ -2798,7 +2798,7 @@ def local_gpu_cholesky(fgraph, op, context_name, inputs, outputs):
 # For Cholesky decomposition, magma 2.2 is slower than cusolver 8 (tested for
 # matrices of size 1000). Thus, cusolver is prioritized during graph
 # optimizations. To explicitly use magma, you should disable cusolver using
-# `optimizer_excluding=cusolver` in Theano config.
+# `optimizer_excluding=cusolver` in Aesara config.
 lifter = op_lifter([slinalg.Cholesky])(local_gpu_cholesky)
 matrix_ops_db.register(
     "local_gpu_cholesky",
@@ -2964,7 +2964,7 @@ def local_gpu_magma_svd(fgraph, op, context_name, inputs, outputs):
 
 
 @register_opt("ctc", "fast_compile")
-@op_lifter([theano.tensor.nnet.ctc.ConnectionistTemporalClassification])
+@op_lifter([aesara.tensor.nnet.ctc.ConnectionistTemporalClassification])
 @register_opt2([ConnectionistTemporalClassification], "ctc", "fast_compile")
 def local_gpu_ctc(fgraph, op, context_name, inputs, outputs):
     op = GpuConnectionistTemporalClassification(compute_grad=op.compute_grad)
@@ -3110,7 +3110,7 @@ def local_gpu_ctc(fgraph, op, context_name, inputs, outputs):
 
 # We import these opts here instead of at the top of this file
 # to avoid a circular dependency problem with dnn
-from theano.gpuarray.dnn import (  # noqa: E402
+from aesara.gpuarray.dnn import (  # noqa: E402
     local_abstract_batch_norm_inference_cudnn,
     local_abstract_batch_norm_train_cudnn,
     local_abstract_batch_norm_train_grad_cudnn,
@@ -3171,7 +3171,7 @@ def local_gpu_ctc(fgraph, op, context_name, inputs, outputs):
     # cpu is a normal optimization. We can't register it in
     # GraphToGPU.  So for now, only add it to the slower EQ phase.  If
     # there is no cuDNN, we still want to move it to the GPU now with
-    # a Theano graph so to have this graph on the GPU.
+    # an Aesara graph so to have this graph on the GPU.
     abstract_batch_norm_db.register(
         cpu.__name__, cpu, "gpuarray", "fast_compile", "fast_run", position="last"
     )
diff --git a/theano/gpuarray/opt_util.py b/aesara/gpuarray/opt_util.py
similarity index 96%
rename from theano/gpuarray/opt_util.py
rename to aesara/gpuarray/opt_util.py
index e64a6995b0..04f5babe9f 100644
--- a/theano/gpuarray/opt_util.py
+++ b/aesara/gpuarray/opt_util.py
@@ -2,25 +2,25 @@
 
 import numpy as np
 
-from theano import scalar as ts
-from theano.gpuarray.basic_ops import (
+from aesara import scalar as ts
+from aesara.gpuarray.basic_ops import (
     GpuAllocEmpty,
     GpuFromHost,
     GpuReshape,
     HostFromGpu,
     host_from_gpu,
 )
-from theano.gpuarray.elemwise import GpuDimShuffle, GpuElemwise
-from theano.gpuarray.type import GpuArrayType, get_context, move_to_gpu
-from theano.graph.basic import Constant
-from theano.graph.op import Op
-from theano.graph.opt import copy_stack_trace, inherit_stack_trace, local_optimizer
-from theano.tensor.basic import as_tensor, cast, get_scalar_constant_value, join
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import prod
-from theano.tensor.shape import shape_padright
-from theano.tensor.type import TensorType
+from aesara.gpuarray.elemwise import GpuDimShuffle, GpuElemwise
+from aesara.gpuarray.type import GpuArrayType, get_context, move_to_gpu
+from aesara.graph.basic import Constant
+from aesara.graph.op import Op
+from aesara.graph.opt import copy_stack_trace, inherit_stack_trace, local_optimizer
+from aesara.tensor.basic import as_tensor, cast, get_scalar_constant_value, join
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import prod
+from aesara.tensor.shape import shape_padright
+from aesara.tensor.type import TensorType
 
 
 # Define a few operations to use in optimizations,
@@ -49,7 +49,7 @@ def grab_cpu_scalar(v, nd):
     Parameters
     ----------
     v
-        Theano variable to extract the constant value from.
+        Aesara variable to extract the constant value from.
     nd : int
         Expected number of dimensions for the variable (for
         broadcasted constants).
diff --git a/theano/gpuarray/optdb.py b/aesara/gpuarray/optdb.py
similarity index 93%
rename from theano/gpuarray/optdb.py
rename to aesara/gpuarray/optdb.py
index 67ae9b3460..5401dcd42e 100644
--- a/theano/gpuarray/optdb.py
+++ b/aesara/gpuarray/optdb.py
@@ -1,6 +1,6 @@
-from theano.compile import optdb
-from theano.graph.opt import GraphToGPULocalOptGroup, TopoOptimizer, local_optimizer
-from theano.graph.optdb import DB, EquilibriumDB, LocalGroupDB, SequenceDB
+from aesara.compile import optdb
+from aesara.graph.opt import GraphToGPULocalOptGroup, TopoOptimizer, local_optimizer
+from aesara.graph.optdb import DB, EquilibriumDB, LocalGroupDB, SequenceDB
 
 
 gpu_optimizer = EquilibriumDB()
@@ -105,7 +105,7 @@ class GraphToGPUDB(DB):
     """
 
     def query(self, *tags, **kwtags):
-        from theano.gpuarray.opt import GraphToGPU
+        from aesara.gpuarray.opt import GraphToGPU
 
         opt = gpu_optimizer2.query(*tags, **kwtags)
         return GraphToGPU(opt.local_optimizers_all, opt.local_optimizers_map)
diff --git a/theano/gpuarray/pathparse.py b/aesara/gpuarray/pathparse.py
similarity index 86%
rename from theano/gpuarray/pathparse.py
rename to aesara/gpuarray/pathparse.py
index 31bf7e0c23..89ec6fbf30 100644
--- a/theano/gpuarray/pathparse.py
+++ b/aesara/gpuarray/pathparse.py
@@ -5,20 +5,20 @@
 class PathParser:
     """
     Class that allows to modify system's PATH environment variable
-    at runtime. Currently used in ``theano.gpuarray.dnn`` module
+    at runtime. Currently used in ``aesara.gpuarray.dnn`` module
     on Windows only.
 
     **Examples**:
 
     ..code-block:: python
 
-        theano.pathparse.PathParser(pathToAdd1, pathToAdd2, ...)
+        aesara.pathparse.PathParser(pathToAdd1, pathToAdd2, ...)
         # PATH is then automatically updated for this execution.
 
 
     ..code-block:: python
 
-        paths = theano.pathparse.PathParser()
+        paths = aesara.pathparse.PathParser()
         paths.add(path1)
         paths.add(path2)
         # PATH is updated after each call to ``add()``.
diff --git a/theano/gpuarray/pool.py b/aesara/gpuarray/pool.py
similarity index 89%
rename from theano/gpuarray/pool.py
rename to aesara/gpuarray/pool.py
index 11156a5942..beb8997b25 100644
--- a/theano/gpuarray/pool.py
+++ b/aesara/gpuarray/pool.py
@@ -1,24 +1,24 @@
-import theano
-from theano.gpuarray.basic_ops import (
+import aesara
+from aesara.gpuarray.basic_ops import (
     CGpuKernelBase,
     as_gpuarray_variable,
     gpu_contiguous,
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.type import gpu_context_type
-from theano.graph.basic import Apply
-from theano.graph.params_type import ParamsType
-from theano.scalar import bool as bool_t
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.signal.pool import Pool, PoolingMode_t
-from theano.tensor.type import int_dtypes
+from aesara.gpuarray.type import gpu_context_type
+from aesara.graph.basic import Apply
+from aesara.graph.params_type import ParamsType
+from aesara.scalar import bool as bool_t
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.signal.pool import Pool, PoolingMode_t
+from aesara.tensor.type import int_dtypes
 
 
 try:
     import pygpu
 except ImportError:
-    # To make sure theano is importable
+    # To make sure aesara is importable
     pass
 
 
@@ -80,9 +80,9 @@ def make_node(self, inp, ws, stride=None, pad=None):
         if pad.dtype not in int_dtypes:
             raise TypeError("Padding parameters must be ints.")
 
-        ws = theano.tensor.cast(ws, "int64")
-        stride = theano.tensor.cast(stride, "int64")
-        pad = theano.tensor.cast(pad, "int64")
+        ws = aesara.tensor.cast(ws, "int64")
+        stride = aesara.tensor.cast(stride, "int64")
+        pad = aesara.tensor.cast(pad, "int64")
 
         return Apply(self, [inp, ws, stride, pad], [inp.type()])
 
@@ -99,7 +99,7 @@ def grad(self, inp, grads):
 
         grad = gpu_contiguous(grad)
 
-        disc = [theano.gradient.DisconnectedType()() for i in inp[1:]]
+        disc = [aesara.gradient.DisconnectedType()() for i in inp[1:]]
         if self.mode == "max":
             out = self(img, ws, stride, pad)
             g_out = GpuMaxPoolGrad(ndim=self.ndim, ignore_border=self.ignore_border)(
@@ -189,9 +189,9 @@ def make_node(self, inp, out, out_grad, ws, stride=None, pad=None):
         if pad.dtype not in int_dtypes:
             raise TypeError("Padding parameters must be ints.")
 
-        ws = theano.tensor.cast(ws, "int64")
-        stride = theano.tensor.cast(stride, "int64")
-        pad = theano.tensor.cast(pad, "int64")
+        ws = aesara.tensor.cast(ws, "int64")
+        stride = aesara.tensor.cast(stride, "int64")
+        pad = aesara.tensor.cast(pad, "int64")
 
         return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
 
@@ -202,12 +202,12 @@ def grad(self, inp, grads):
         x, maxout, gz, ws, stride, pad = inp
         (ggx,) = grads
         return [
-            theano.tensor.zeros_like(x),
-            theano.tensor.zeros_like(maxout),
+            aesara.tensor.zeros_like(x),
+            aesara.tensor.zeros_like(maxout),
             GpuDownsampleFactorMaxGradGrad(
                 ndim=self.ndim, ignore_border=self.ignore_border
             )(x, maxout, ggx, ws, stride, pad),
-        ] + [theano.gradient.DisconnectedType()() for i in inp[3:]]
+        ] + [aesara.gradient.DisconnectedType()() for i in inp[3:]]
 
     def connection_pattern(self, node):
         return [[1], [1], [1], [0], [0], [0]]
@@ -272,9 +272,9 @@ def make_node(self, inp, out_grad, ws, stride=None, pad=None):
         if pad.dtype not in int_dtypes:
             raise TypeError("Padding parameters must be ints.")
 
-        ws = theano.tensor.cast(ws, "int64")
-        stride = theano.tensor.cast(stride, "int64")
-        pad = theano.tensor.cast(pad, "int64")
+        ws = aesara.tensor.cast(ws, "int64")
+        stride = aesara.tensor.cast(stride, "int64")
+        pad = aesara.tensor.cast(pad, "int64")
 
         return Apply(self, [inp, out_grad, ws, stride, pad], [inp.type()])
 
@@ -285,11 +285,11 @@ def grad(self, inp, grads):
         x, gz, ws, stride, pad = inp
         (ggx,) = grads
         return [
-            theano.tensor.zeros_like(x),
+            aesara.tensor.zeros_like(x),
             GpuPool(ignore_border=self.ignore_border, ndim=self.ndim, mode=self.mode)(
                 ggx, ws, stride, pad
             ),
-        ] + [theano.gradient.DisconnectedType()() for i in inp[2:]]
+        ] + [aesara.gradient.DisconnectedType()() for i in inp[2:]]
 
     def connection_pattern(self, node):
         return [[1], [1], [0], [0], [0]]
@@ -348,9 +348,9 @@ def make_node(self, inp, out, out_grad, ws, stride=None, pad=None):
         if pad.dtype not in int_dtypes:
             raise TypeError("Padding parameters must be ints.")
 
-        ws = theano.tensor.cast(ws, "int64")
-        stride = theano.tensor.cast(stride, "int64")
-        pad = theano.tensor.cast(pad, "int64")
+        ws = aesara.tensor.cast(ws, "int64")
+        stride = aesara.tensor.cast(stride, "int64")
+        pad = aesara.tensor.cast(pad, "int64")
 
         return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
 
@@ -361,12 +361,12 @@ def grad(self, inp, grads):
         x, maxout, ggx, ws, stride, pad = inp
         (gz,) = grads
         return [
-            theano.tensor.zeros_like(x),
-            theano.tensor.zeros_like(maxout),
+            aesara.tensor.zeros_like(x),
+            aesara.tensor.zeros_like(maxout),
             GpuMaxPoolGrad(ignore_border=self.ignore_border, ndim=self.ndim)(
                 x, maxout, gz, ws, stride, pad
             ),
-        ] + [theano.gradient.DisconnectedType()() for i in inp[3:]]
+        ] + [aesara.gradient.DisconnectedType()() for i in inp[3:]]
 
     def connection_pattern(self, node):
         return [[1], [1], [1], [0], [0], [0]]
@@ -431,9 +431,9 @@ def make_node(self, inp, eval_point, ws, stride=None, pad=None):
         if pad.dtype not in int_dtypes:
             raise TypeError("Padding parameters must be ints.")
 
-        ws = theano.tensor.cast(ws, "int64")
-        stride = theano.tensor.cast(stride, "int64")
-        pad = theano.tensor.cast(pad, "int64")
+        ws = aesara.tensor.cast(ws, "int64")
+        stride = aesara.tensor.cast(stride, "int64")
+        pad = aesara.tensor.cast(pad, "int64")
 
         return Apply(self, [inp, eval_point, ws, stride, pad], [eval_point.type()])
 
diff --git a/theano/gpuarray/reduction.py b/aesara/gpuarray/reduction.py
similarity index 96%
rename from theano/gpuarray/reduction.py
rename to aesara/gpuarray/reduction.py
index 628604ffa6..848fde17bc 100644
--- a/theano/gpuarray/reduction.py
+++ b/aesara/gpuarray/reduction.py
@@ -1,6 +1,6 @@
-from theano.graph.basic import Apply
-from theano.graph.op import COp
-from theano.graph.type import Generic
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp
+from aesara.graph.type import Generic
 
 from .basic_ops import as_gpuarray_variable, gpuarray_helper_inc_dir, infer_context_name
 from .type import GpuArrayType
@@ -96,11 +96,11 @@ def c_code(self, node, name, input_names, output_names, sub):
             }
         }
 
-        if (theano_prep_output(&%(max)s, %(name)s_output_ndim, %(name)s_output_dims, %(max_typecode)s, GA_C_ORDER, %(X)s->context)) {
+        if (aesara_prep_output(&%(max)s, %(name)s_output_ndim, %(name)s_output_dims, %(max_typecode)s, GA_C_ORDER, %(X)s->context)) {
             PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare max output.");
             %(fail)s
         }
-        if (theano_prep_output(&%(argmax)s, %(name)s_output_ndim, %(name)s_output_dims, %(argmax_typecode)s, GA_C_ORDER, %(X)s->context)) {
+        if (aesara_prep_output(&%(argmax)s, %(name)s_output_ndim, %(name)s_output_dims, %(argmax_typecode)s, GA_C_ORDER, %(X)s->context)) {
             PyErr_SetString(PyExc_RuntimeError, "GpuMaxAndArgmax: unable to prepare argmax output.");
             %(fail)s
         }
diff --git a/theano/gpuarray/rng_mrg.py b/aesara/gpuarray/rng_mrg.py
similarity index 95%
rename from theano/gpuarray/rng_mrg.py
rename to aesara/gpuarray/rng_mrg.py
index 460ed36802..fb5afe1977 100644
--- a/theano/gpuarray/rng_mrg.py
+++ b/aesara/gpuarray/rng_mrg.py
@@ -1,5 +1,5 @@
 """
-GPU implementation of MRG31k3p random number generator for Theano.
+GPU implementation of MRG31k3p random number generator for Aesara.
 
 Generator code in SSJ package (L'Ecuyer & Simard).
 http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
@@ -7,8 +7,8 @@
 """
 
 
-from theano import tensor as tt
-from theano.gpuarray.basic_ops import (
+from aesara import tensor as tt
+from aesara.gpuarray.basic_ops import (
     GpuFromHost,
     GpuKernelBase,
     Kernel,
@@ -16,14 +16,14 @@
     host_from_gpu,
     infer_context_name,
 )
-from theano.gpuarray.fp16_help import write_w
-from theano.gpuarray.opt import register_opt, register_opt2
-from theano.gpuarray.type import GpuArrayType, gpu_context_type
-from theano.graph.basic import Apply
-from theano.graph.opt import local_optimizer
-from theano.sandbox.rng_mrg import mrg_uniform, mrg_uniform_base
-from theano.scalar import int32 as int_t
-from theano.tensor import as_tensor_variable, get_vector_length
+from aesara.gpuarray.fp16_help import write_w
+from aesara.gpuarray.opt import register_opt, register_opt2
+from aesara.gpuarray.type import GpuArrayType, gpu_context_type
+from aesara.graph.basic import Apply
+from aesara.graph.opt import local_optimizer
+from aesara.sandbox.rng_mrg import mrg_uniform, mrg_uniform_base
+from aesara.scalar import int32 as int_t
+from aesara.tensor import as_tensor_variable, get_vector_length
 
 
 class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
diff --git a/theano/gpuarray/sort.py b/aesara/gpuarray/sort.py
similarity index 97%
rename from theano/gpuarray/sort.py
rename to aesara/gpuarray/sort.py
index 4921cdf4c9..b610059a38 100644
--- a/theano/gpuarray/sort.py
+++ b/aesara/gpuarray/sort.py
@@ -3,10 +3,10 @@
 
 import numpy as np
 
-import theano
-from theano.graph.basic import Apply
-from theano.tensor import as_tensor_variable
-from theano.tensor.sort import TopKOp
+import aesara
+from aesara.graph.basic import Apply
+from aesara.tensor import as_tensor_variable
+from aesara.tensor.sort import TopKOp
 
 from .basic_ops import (
     GpuKernelBase,
@@ -23,7 +23,7 @@
     import pygpu
     import pygpu.gpuarray as ga
 except ImportError:
-    # To make sure theano is importable
+    # To make sure aesara is importable
     pass
 
 
@@ -228,7 +228,7 @@ def c_code(self, node, nodename, inps, outs, sub):
             params_dv += "".join(f"dvstrides[{i}], " for i in reordered_axes)
             prep_output += (
                 """
-    if (0 != theano_prep_output(
+    if (0 != aesara_prep_output(
         &%(yv)s, %(ndim)d, odims,
         %(inp_dtc)s, GA_C_ORDER, %(ctx)s)) {
         %(fail)s;
@@ -244,7 +244,7 @@ def c_code(self, node, nodename, inps, outs, sub):
             params_di += "".join(f"distrides[{i}], " for i in reordered_axes)
             prep_output += (
                 """
-    if (0 != theano_prep_output(
+    if (0 != aesara_prep_output(
         &%(yi)s, %(ndim)d, odims,
         %(out_dtc)s, GA_C_ORDER, %(ctx)s)) {
         %(fail)s;
@@ -379,7 +379,7 @@ def __init__(self, axis):
     def __call__(self, val1, val2):
         v1 = np.sort(val1, axis=self.axis)
         v2 = np.sort(val2, axis=self.axis)
-        ret = theano.tensor.type.values_eq_approx(v1, v2)
+        ret = aesara.tensor.type.values_eq_approx(v1, v2)
         return ret
 
 
diff --git a/theano/gpuarray/subtensor.py b/aesara/gpuarray/subtensor.py
similarity index 98%
rename from theano/gpuarray/subtensor.py
rename to aesara/gpuarray/subtensor.py
index 3ddfdfee6e..769450c3ea 100644
--- a/theano/gpuarray/subtensor.py
+++ b/aesara/gpuarray/subtensor.py
@@ -2,19 +2,19 @@
 
 import numpy as np
 
-import theano.tensor as tt
-from theano.gradient import grad_not_implemented
-from theano.graph.basic import Apply
-from theano.graph.op import COp, Op
-from theano.graph.params_type import ParamsType
-from theano.graph.type import CType
-from theano.link.c.interface import HideC
-from theano.scalar import bool as bool_t
-from theano.scalar import int32 as int_t
-from theano.scalar import uint32 as size_t
-from theano.tensor.basic import AllocDiag
-from theano.tensor.math import clip, minimum
-from theano.tensor.subtensor import (
+import aesara.tensor as tt
+from aesara.gradient import grad_not_implemented
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp, Op
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import CType
+from aesara.link.c.interface import HideC
+from aesara.scalar import bool as bool_t
+from aesara.scalar import int32 as int_t
+from aesara.scalar import uint32 as size_t
+from aesara.tensor.basic import AllocDiag
+from aesara.tensor.math import clip, minimum
+from aesara.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedSubtensor,
     AdvancedSubtensor1,
@@ -22,7 +22,7 @@
     Subtensor,
     get_idx_list,
 )
-from theano.tensor.type import integer_dtypes
+from aesara.tensor.type import integer_dtypes
 
 
 try:
@@ -31,7 +31,7 @@
 except ImportError:
     pass
 
-from theano.gpuarray.basic_ops import (
+from aesara.gpuarray.basic_ops import (
     GpuKernelBase,
     Kernel,
     as_gpuarray_variable,
@@ -39,7 +39,7 @@
     gpuarray_helper_inc_dir,
     infer_context_name,
 )
-from theano.gpuarray.type import GpuArrayType, gpu_context_type
+from aesara.gpuarray.type import GpuArrayType, gpu_context_type
 
 
 iadd_reg = {}
@@ -1014,7 +1014,7 @@ def c_code(self, node, name, inputs, outputs, sub):
         step[0] = 0;
         num_indices = PyArray_SIZE(%(ind)s);
         if (!%(params)s->inplace) {
-          %(out)s = theano_try_copy(%(out)s, %(x)s);
+          %(out)s = aesara_try_copy(%(out)s, %(x)s);
           if (%(out)s == NULL) {
             // Exception already set
             %(fail)s
@@ -1155,7 +1155,7 @@ def c_code(self, node, name, inputs, outputs, sub):
   %(out)s = %(x)s;
   Py_INCREF(%(out)s);
 } else {
-  %(out)s = theano_try_copy(%(out)s, %(x)s);
+  %(out)s = aesara_try_copy(%(out)s, %(x)s);
 }
 if (!%(out)s) {
   // Exception already set
diff --git a/theano/gpuarray/type.py b/aesara/gpuarray/type.py
similarity index 96%
rename from theano/gpuarray/type.py
rename to aesara/gpuarray/type.py
index 2f0ea4c6c9..98d9929ba4 100644
--- a/theano/gpuarray/type.py
+++ b/aesara/gpuarray/type.py
@@ -5,32 +5,32 @@
 
 import numpy as np
 
-import theano
-import theano.scalar as ts
-import theano.tensor as tt
-import theano.tensor.basic
-from theano.compile import SharedVariable
-from theano.configdefaults import config
-from theano.graph.basic import Constant, Variable
-from theano.graph.type import CType
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.shape import (
+import aesara
+import aesara.scalar as ts
+import aesara.tensor as tt
+import aesara.tensor.basic
+from aesara.compile import SharedVariable
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, Variable
+from aesara.graph.type import CType
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.shape import (
     register_shape_c_code,
     register_shape_i_c_code,
     register_specify_shape_c_code,
 )
-from theano.tensor.type import TensorType, complex_dtypes, discrete_dtypes
-from theano.tensor.type import values_eq_approx as tensor_values_eq_approx
-from theano.tensor.type import (
+from aesara.tensor.type import TensorType, complex_dtypes, discrete_dtypes
+from aesara.tensor.type import values_eq_approx as tensor_values_eq_approx
+from aesara.tensor.type import (
     values_eq_approx_remove_inf as tensor_values_eq_approx_remove_inf,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     values_eq_approx_remove_inf_nan as tensor_values_eq_approx_remove_inf_nan,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     values_eq_approx_remove_nan as tensor_values_eq_approx_remove_nan,
 )
-from theano.tensor.var import TensorConstantSignature, _tensor_py_operators
+from aesara.tensor.var import TensorConstantSignature, _tensor_py_operators
 
 
 # Make sure this is importable even if pygpu is absent
@@ -193,7 +193,7 @@ class GpuArrayType(CType):
 
     See Also
     --------
-    theano.graph.type.Type
+    aesara.graph.type.Type
 
     """
 
@@ -462,8 +462,8 @@ def dtype_specs(self):
                 "int32": (int, "npy_int32", "NPY_INT32"),
                 "uint64": (int, "npy_uint64", "NPY_UINT64"),
                 "int64": (int, "npy_int64", "NPY_INT64"),
-                # 'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
-                # 'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')
+                # 'complex128': (complex, 'aesara_complex128', 'NPY_COMPLEX128'),
+                # 'complex64': (complex, 'aesara_complex64', 'NPY_COMPLEX64')
             }[self.dtype]
         except KeyError:
             raise TypeError(
@@ -587,7 +587,7 @@ def values_eq_approx(
         return GpuArrayType.values_eq(a, b)
     else:
         if not (allow_remove_inf or allow_remove_nan):
-            atol_, rtol_ = theano.tensor.math._get_atol_rtol(a, b)
+            atol_, rtol_ = aesara.tensor.math._get_atol_rtol(a, b)
             if rtol is not None:
                 rtol_ = rtol
             if atol is not None:
@@ -672,7 +672,7 @@ class GpuArrayVariable(_operators, Variable):
 
     # override the default
     def __repr_test_value__(self):
-        return repr(np.array(theano.graph.op.get_test_value(self)))
+        return repr(np.array(aesara.graph.op.get_test_value(self)))
 
 
 GpuArrayType.Variable = GpuArrayVariable
@@ -765,7 +765,7 @@ def gpuarray_shared_constructor(
     """
     SharedVariable constructor for GpuArrayType.
 
-    See :func:`theano.shared`.
+    See :func:`aesara.shared`.
 
     :target: default None
         The device target. As None is a valid value and we need to
@@ -801,7 +801,7 @@ def gpuarray_shared_constructor(
     return GpuArraySharedVariable(type=type, value=deviceval, name=name, strict=strict)
 
 
-theano.compile.register_view_op_c_code(
+aesara.compile.register_view_op_c_code(
     GpuArrayType,
     """
     Py_XDECREF(%(oname)s);
@@ -847,7 +847,7 @@ def gpuarray_shared_constructor(
     version=(1,),
 )
 
-theano.compile.register_deep_copy_op_c_code(
+aesara.compile.register_deep_copy_op_c_code(
     GpuArrayType,
     """
     Py_XDECREF(%(oname)s);
@@ -857,7 +857,7 @@ def gpuarray_shared_constructor(
     version=(5,),
 )
 
-theano.tensor.basic.register_rebroadcast_c_code(
+aesara.tensor.basic.register_rebroadcast_c_code(
     GpuArrayType,
     """
     if(%(iname)s->ga.dimensions[%(axis)s] != 1){
diff --git a/theano/gradient.py b/aesara/gradient.py
similarity index 91%
rename from theano/gradient.py
rename to aesara/gradient.py
index 3c7770b10e..f7cbd005ab 100644
--- a/theano/gradient.py
+++ b/aesara/gradient.py
@@ -8,31 +8,25 @@
 
 import numpy as np
 
-import theano
-from theano.compile.ops import ViewOp
-from theano.configdefaults import config
-from theano.graph import utils
-from theano.graph.basic import Variable
-from theano.graph.null_type import NullType, null_type
-from theano.graph.op import get_test_values
-from theano.graph.type import Type
-
-
-__authors__ = (
-    "James Bergstra, Razvan Pascanu, Arnaud Bergeron, Ian Goodfellow, PyMC Developers"
-)
-__copyright__ = "(c) 2011, Universite de Montreal"
-__license__ = "3-clause BSD License"
+import aesara
+from aesara.compile.ops import ViewOp
+from aesara.configdefaults import config
+from aesara.graph import utils
+from aesara.graph.basic import Variable
+from aesara.graph.null_type import NullType, null_type
+from aesara.graph.op import get_test_values
+from aesara.graph.type import Type
+
 
 __docformat__ = "restructuredtext en"
-_logger = logging.getLogger("theano.gradient")
-
-# we can't do "import theano.tensor"
-# tensor depends on theano.compile
-# theano.compile depends on theano.gradient (this file)
-# the reason theano.compile depends on theano.gradient
-# is that theano.compile.builders contains the op from graph
-# functionality and it uses theano.gradient to implement
+_logger = logging.getLogger("aesara.gradient")
+
+# we can't do "import aesara.tensor"
+# tensor depends on aesara.compile
+# aesara.compile depends on aesara.gradient (this file)
+# the reason aesara.compile depends on aesara.gradient
+# is that aesara.compile.builders contains the op from graph
+# functionality and it uses aesara.gradient to implement
 # the new op's grad method
 tensor = None
 
@@ -51,14 +45,16 @@ def format_as(use_list, use_tuple, outputs):
     is not a list or a tuple then it is converted into a one element tuple).
     Otherwise (if both flags are false), `outputs` is returned.
     """
-    assert not (use_list and use_tuple), "Both flags cannot be simultaneously True"
+    if use_list and use_tuple:
+        raise ValueError("Both flags cannot be simultaneously True")
     if (use_list or use_tuple) and not isinstance(outputs, (list, tuple)):
         if use_list:
             return [outputs]
         else:
             return (outputs,)
     elif not (use_list or use_tuple) and isinstance(outputs, (list, tuple)):
-        assert len(outputs) == 1, "Wrong arguments. Expected a one element list"
+        if len(outputs) != 1:
+            raise ValueError("Wrong arguments; expected a one element list")
         return outputs[0]
     elif use_list or use_tuple:
         if use_list:
@@ -76,7 +72,7 @@ def grad_not_implemented(op, x_pos, x, comment=""):
     un-computable variable, an exception (e.g. `NotImplementedError`) will be
     raised indicating that the gradient on the
     `x_pos`'th input of `op` has not been implemented. Likewise if
-    any call to theano.function involves this variable.
+    any call to aesara.function involves this variable.
 
     Optionally adds a comment to the exception explaining why this
     gradient is not implemented.
@@ -99,7 +95,7 @@ def grad_undefined(op, x_pos, x, comment=""):
     un-computable variable, an exception (e.g. `GradUndefinedError`) will be
     raised indicating that the gradient on the
     `x_pos`'th input of `op` is mathematically undefined. Likewise if
-    any call to theano.function involves this variable.
+    any call to aesara.function involves this variable.
 
     Optionally adds a comment to the exception explaining why this
     gradient is not defined.
@@ -168,13 +164,13 @@ def Rop(f, wrt, eval_points, disconnected_outputs="raise", return_disconnected="
 
     Parameters
     ----------
-    f : :class:`~theano.graph.basic.Variable` or list of Variables
+    f : :class:`~aesara.graph.basic.Variable` or list of Variables
         `f` stands for the output of the computational graph to which you
         want to apply the R operator
-    wrt : :class:`~theano.graph.basic.Variable` or list of Variables
+    wrt : :class:`~aesara.graph.basic.Variable` or list of Variables
         variables for which you compute the R operator of the expression
         described by `f`
-    eval_points : :class:`~theano.graph.basic.Variable` or list of Variables
+    eval_points : :class:`~aesara.graph.basic.Variable` or list of Variables
         evalutation points for each of the variables in `wrt`
     disconnected_outputs : str
         Defines the behaviour if some of the variables in `f`
@@ -194,7 +190,7 @@ def Rop(f, wrt, eval_points, disconnected_outputs="raise", return_disconnected="
 
     Returns
     -------
-    :class:`~theano.graph.basic.Variable` or list/tuple of Variables depending on type of f
+    :class:`~aesara.graph.basic.Variable` or list/tuple of Variables depending on type of f
         Symbolic expression such that
         R_op[i] = sum_j (d f[i] / d wrt[j]) eval_point[j]
         where the indices in that expression are magic multidimensional
@@ -214,7 +210,8 @@ def Rop(f, wrt, eval_points, disconnected_outputs="raise", return_disconnected="
     if not isinstance(f, (list, tuple)):
         f = [f]
 
-    assert len(wrt) == len(eval_points)
+    if len(wrt) != len(eval_points):
+        raise ValueError("`wrt` must be the same length as `eval_points`.")
 
     # Check that each element of wrt corresponds to an element
     # of eval_points with the same dimensionality.
@@ -222,9 +219,9 @@ def Rop(f, wrt, eval_points, disconnected_outputs="raise", return_disconnected="
         i = pack[0]
         wrt_elem, eval_point = pack[1]
         if not isinstance(wrt_elem, Variable):
-            wrt_elem = theano.tensor.as_tensor_variable(wrt_elem)
+            wrt_elem = aesara.tensor.as_tensor_variable(wrt_elem)
         if not isinstance(eval_point, Variable):
-            eval_point = theano.tensor.as_tensor_variable(eval_point)
+            eval_point = aesara.tensor.as_tensor_variable(eval_point)
 
         try:
 
@@ -284,9 +281,9 @@ def _traverse(node):
         for x, y in zip(inputs, local_eval_points):
             if y is not None:
                 if not isinstance(x, Variable):
-                    x = theano.tensor.as_tensor_variable(x)
+                    x = aesara.tensor.as_tensor_variable(x)
                 if not isinstance(y, Variable):
-                    y = theano.tensor.as_tensor_variable(y)
+                    y = aesara.tensor.as_tensor_variable(y)
                 try:
                     y = x.type.filter_variable(y)
                 except TypeError:
@@ -302,7 +299,7 @@ def _traverse(node):
                     # we have to make it be wrong for Rop to keep working
                     # Rop should eventually be upgraded to handle integers
                     # correctly, the same as grad
-                    y = theano.tensor.cast(y, x.type.dtype)
+                    y = aesara.tensor.cast(y, x.type.dtype)
                     y = x.type.filter_variable(y)
                 assert x.type == y.type
                 same_type_eval_points.append(y)
@@ -345,7 +342,7 @@ def _traverse(node):
                     "'ignore', 'warn' and 'raise'."
                 )
             if return_disconnected.lower() == "zero":
-                rval.append(theano.tensor.zeros_like(out))
+                rval.append(aesara.tensor.zeros_like(out))
             elif return_disconnected.lower() == "none":
                 rval.append(None)
             elif return_disconnected.lower() == "disconnected":
@@ -370,18 +367,18 @@ def Lop(f, wrt, eval_points, consider_constant=None, disconnected_inputs="raise"
 
     Parameters
     ----------
-    f : :class:`~theano.graph.basic.Variable` or list of Variables
+    f : :class:`~aesara.graph.basic.Variable` or list of Variables
         `f` stands for the output of the computational graph to which you
         want to apply the L operator
-    wrt : :class:`~theano.graph.basic.Variable` or list of Variables
+    wrt : :class:`~aesara.graph.basic.Variable` or list of Variables
         variables for which you compute the L operator of the expression
         described by `f`
-    eval_points : :class:`~theano.graph.basic.Variable` or list of Variables
+    eval_points : :class:`~aesara.graph.basic.Variable` or list of Variables
         evalutation points for each of the variables in `f`
 
     Returns
     -------
-    :class:`~theano.graph.basic.Variable` or list/tuple of Variables depending on type of `f`
+    :class:`~aesara.graph.basic.Variable` or list/tuple of Variables depending on type of `f`
         Symbolic expression such that
         ``L_op[i] = sum_i (d f[i] / d wrt[j]) eval_point[i]``
         where the indices in that expression are magic multidimensional
@@ -437,16 +434,16 @@ def grad(
     """
     Return symbolic gradients of one cost with respect to one or more variables.
 
-    For more information about how automatic differentiation works in Theano,
+    For more information about how automatic differentiation works in Aesara,
     see :mod:`gradient`. For information on how to implement the gradient of
     a certain Op, see :func:`grad`.
 
     Parameters
     ----------
-    cost : :class:`~theano.graph.basic.Variable` scalar (0-dimensional) tensor variable or ``None``
+    cost : :class:`~aesara.graph.basic.Variable` scalar (0-dimensional) tensor variable or ``None``
         Value that we are differentiating (that we want the gradient of).
         May be `None` if `known_grads` is provided.
-    wrt : :class:`~theano.graph.basic.Variable` or list of Variables
+    wrt : :class:`~aesara.graph.basic.Variable` or list of Variables
         Term[s] with respect to which we want gradients
     consider_constant : list of variables
         Expressions not to backpropagate through
@@ -492,7 +489,7 @@ def grad(
 
     if cost is None:
         if known_grads is None:
-            raise AssertionError("cost and known_grads can't both be None.")
+            raise ValueError("cost and known_grads can't both be None.")
 
     if cost is not None and isinstance(cost.type, NullType):
         raise ValueError(
@@ -549,7 +546,7 @@ def grad(
         # So before we try to cast it make sure it even has a dtype
         if (
             hasattr(g_cost.type, "dtype")
-            and cost.type.dtype in theano.tensor.type.continuous_dtypes
+            and cost.type.dtype in aesara.tensor.type.continuous_dtypes
         ):
             # Here we enforce the constraint that floating point variables
             # have the same dtype as their gradient.
@@ -558,7 +555,7 @@ def grad(
         # This is to be enforced by the Op.grad method for the
         # Op that outputs cost.
         if hasattr(g_cost.type, "dtype"):
-            assert g_cost.type.dtype in theano.tensor.type.continuous_dtypes
+            assert g_cost.type.dtype in aesara.tensor.type.continuous_dtypes
 
         grad_dict[cost] = g_cost
 
@@ -567,9 +564,9 @@ def grad(
 
         if not hasattr(g_var, "type"):
             raise TypeError(
-                "output grads must be theano variables."
+                "output grads must be aesara variables."
                 f"Ambiguous whether {type(g_var)} should be made into tensor"
-                " or sparse theano variable"
+                " or sparse aesara variable"
             )
 
         if not isinstance(
@@ -628,7 +625,7 @@ def handle_disconnected(var):
     for var in grad_dict:
         g = grad_dict[var]
         if hasattr(g.type, "dtype"):
-            assert g.type.dtype in theano.tensor.type.float_dtypes
+            assert g.type.dtype in aesara.tensor.type.float_dtypes
 
     rval = _populate_grad_dict(var_to_app_to_idx, grad_dict, wrt, cost_name)
 
@@ -664,12 +661,12 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
     With respect to `wrt`, computes gradients of cost and/or from
     existing `start` gradients, up to the `end` variables of a
     symbolic digraph.  In other words, computes gradients for a
-    subgraph of the symbolic theano function. Ignores all disconnected
+    subgraph of the symbolic aesara function. Ignores all disconnected
     inputs.
 
     This can be useful when one needs to perform the gradient descent
     iteratively (e.g. one layer at a time in an MLP), or when a
-    particular operation is not differentiable in theano
+    particular operation is not differentiable in aesara
     (e.g. stochastic sampling from a multinomial). In the latter case,
     the gradient of the non-differentiable process could be
     approximated by user-defined formula, which could be calculated
@@ -686,14 +683,14 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
 
     .. code-block:: python
 
-        x, t = theano.tensor.fvector('x'), theano.tensor.fvector('t')
-        w1 = theano.shared(np.random.randn(3,4))
-        w2 = theano.shared(np.random.randn(4,2))
-        a1 = theano.tensor.tanh(theano.tensor.dot(x,w1))
-        a2 = theano.tensor.tanh(theano.tensor.dot(a1,w2))
-        cost2 = theano.tensor.sqr(a2 - t).sum()
-        cost2 += theano.tensor.sqr(w2.sum())
-        cost1 = theano.tensor.sqr(w1.sum())
+        x, t = aesara.tensor.fvector('x'), aesara.tensor.fvector('t')
+        w1 = aesara.shared(np.random.randn(3,4))
+        w2 = aesara.shared(np.random.randn(4,2))
+        a1 = aesara.tensor.tanh(aesara.tensor.dot(x,w1))
+        a2 = aesara.tensor.tanh(aesara.tensor.dot(a1,w2))
+        cost2 = aesara.tensor.sqr(a2 - t).sum()
+        cost2 += aesara.tensor.sqr(w2.sum())
+        cost1 = aesara.tensor.sqr(w1.sum())
 
         params = [[w2],[w1]]
         costs = [cost2,cost1]
@@ -702,7 +699,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
         next_grad = None
         param_grads = []
         for i in range(2):
-            param_grad, next_grad = theano.subgraph_grad(
+            param_grad, next_grad = aesara.subgraph_grad(
                 wrt=params[i], end=grad_ends[i],
                 start=next_grad, cost=costs[i]
             )
@@ -716,8 +713,8 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
         Gradients are computed with respect to `wrt`.
 
     end : list of variables
-        Theano variables at which to end gradient descent (they are
-        considered constant in theano.grad).  For convenience, the
+        Aesara variables at which to end gradient descent (they are
+        considered constant in aesara.grad).  For convenience, the
         gradients with respect to these variables are also returned.
 
     start : dictionary of variables
@@ -725,9 +722,9 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
         gradients. This is useful when the gradient on some variables
         are known. These are used to compute the gradients backwards up
         to the variables in `end` (they are used as known_grad in
-        theano.grad).
+        aesara.grad).
 
-    cost : :class:`~theano.graph.basic.Variable` scalar (0-dimensional) variable
+    cost : :class:`~aesara.graph.basic.Variable` scalar (0-dimensional) variable
         Additional costs for which to compute the gradients.  For
         example, these could be weight decay, an l1 constraint, MSE,
         NLL, etc. May optionally be None if start is provided.
@@ -752,11 +749,18 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
 
     .. versionadded:: 0.7
     """
-    assert (cost is not None) or (start is not None)
-    assert isinstance(end, list)
-    assert isinstance(wrt, list)
+    if cost is None and start is None:
+        raise ValueError("`cost` or `start` must be specified.")
+
+    if not isinstance(end, list):
+        raise TypeError("`end` must be a list.")
+
+    if not isinstance(wrt, list):
+        raise TypeError("`wrt` must be a list.")
+
     if start is not None:
-        assert isinstance(start, dict)
+        if not isinstance(start, dict):
+            raise TypeError("`start` must be a dictionary.")
 
     params = list(set(wrt + end))
 
@@ -764,7 +768,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
     cost_grads = None
     if start is not None:
         start_grads = list(
-            theano.grad(
+            aesara.grad(
                 cost=None,
                 wrt=params,
                 known_grads=start,
@@ -775,7 +779,7 @@ def subgraph_grad(wrt, end, start=None, cost=None, details=False):
 
     if cost is not None:
         cost_grads = list(
-            theano.grad(
+            aesara.grad(
                 cost=cost,
                 wrt=params,
                 consider_constant=end,
@@ -887,9 +891,9 @@ def _populate_var_to_app_to_idx(outputs, wrt, consider_constant):
         consider_constant = []
     else:
         # error checking on consider_constant: verify that it is a collection
-        # of theano variables
+        # of aesara variables
         # this is important, if someone accidentally passes a nested data
-        # structure with theano variables at the leaves, only the root will
+        # structure with aesara variables at the leaves, only the root will
         # be properly considered constant
         try:
             iter(consider_constant)
@@ -1078,7 +1082,7 @@ def access_term_cache(node):
             # List of bools indicating if each output is an integer dtype
             output_is_int = [
                 hasattr(output.type, "dtype")
-                and output.type.dtype in theano.tensor.type.discrete_dtypes
+                and output.type.dtype in aesara.tensor.type.discrete_dtypes
                 for output in node.outputs
             ]
 
@@ -1158,7 +1162,7 @@ def try_to_copy_if_needed(var):
                     o_dt = getattr(o.type, "dtype", None)
                     og_dt = getattr(og.type, "dtype", None)
                     if (
-                        o_dt not in theano.tensor.type.discrete_dtypes
+                        o_dt not in aesara.tensor.type.discrete_dtypes
                         and og_dt
                         and o_dt != og_dt
                     ):
@@ -1173,7 +1177,7 @@ def try_to_copy_if_needed(var):
                     ng_dt = getattr(ng.type, "dtype", None)
                     if (
                         ng_dt is not None
-                        and o_dt not in theano.tensor.type.discrete_dtypes
+                        and o_dt not in aesara.tensor.type.discrete_dtypes
                     ):
                         assert ng_dt == o_dt
 
@@ -1187,7 +1191,7 @@ def try_to_copy_if_needed(var):
                 for ng in new_output_grads:
                     assert (
                         getattr(ng.type, "dtype", None)
-                        not in theano.tensor.type.discrete_dtypes
+                        not in aesara.tensor.type.discrete_dtypes
                     )
 
                 # If config.compute_test_value is turned on, check that the
@@ -1304,7 +1308,7 @@ def try_to_copy_if_needed(var):
                             )
 
                 if not isinstance(term.type, (NullType, DisconnectedType)):
-                    if term.type.dtype not in theano.tensor.type.float_dtypes:
+                    if term.type.dtype not in aesara.tensor.type.float_dtypes:
                         raise TypeError(
                             str(node.op) + ".grad illegally "
                             " returned an integer-valued variable."
@@ -1329,7 +1333,7 @@ def try_to_copy_if_needed(var):
                                 "integer-valued outputs so it should be "
                                 "NullType, DisconnectedType, or some form "
                                 "of zeros. It is not NullType or "
-                                "DisconnectedType and theano can't "
+                                "DisconnectedType and aesara can't "
                                 "simplify it to a constant, so it's not "
                                 "verifiably zeros."
                             )
@@ -1339,7 +1343,7 @@ def try_to_copy_if_needed(var):
                                 f" {i}. Since this input is only connected "
                                 "to integer-valued outputs, it should "
                                 "evaluate to zeros, but it evaluates to"
-                                f"{theano.get_scalar_constant_value(term)}."
+                                f"{aesara.get_scalar_constant_value(term)}."
                             )
                             raise ValueError(msg)
 
@@ -1456,7 +1460,7 @@ def _float_ones_like(x):
     floating point dtype"""
 
     dtype = x.type.dtype
-    if dtype not in theano.tensor.type.float_dtypes:
+    if dtype not in aesara.tensor.type.float_dtypes:
         dtype = config.floatX
 
     return x.ones_like(dtype=dtype)
@@ -1679,8 +1683,8 @@ def max_err(self, g_pt, abs_tol, rel_tol):
 
 
 def mode_not_slow(mode):
-    from theano.compile.debugmode import DebugMode
-    from theano.compile.mode import FAST_RUN, get_mode
+    from aesara.compile.debugmode import DebugMode
+    from aesara.compile.mode import FAST_RUN, get_mode
 
     if mode == "FAST_COMPILE":
         return FAST_RUN
@@ -1714,14 +1718,14 @@ def verify_grad(
 
     Examples
     --------
-    >>> verify_grad(theano.tensor.tanh,
+    >>> verify_grad(aesara.tensor.tanh,
     ...             (np.asarray([[2, 3, 4], [-1, 3.3, 9.9]]),),
     ...             rng=np.random)
 
     Parameters
     ----------
     fun : a Python function
-        `fun` takes Theano variables as inputs, and returns a Theano variable.
+        `fun` takes Aesara variables as inputs, and returns an Aesara variable.
         For instance, an Op instance with  a single output.
     pt : list of numpy.ndarrays
         Input values, points where the gradient is estimated.
@@ -1759,10 +1763,12 @@ def verify_grad(
     that case as well by using random projections.
 
     """
-    from theano.compile.function import function
-    from theano.compile.sharedvalue import shared
+    from aesara.compile.function import function
+    from aesara.compile.sharedvalue import shared
+
+    if not isinstance(pt, (list, tuple)):
+        raise TypeError("`pt` should be a list or tuple")
 
-    assert isinstance(pt, (list, tuple))
     pt = [np.array(p) for p in pt]
 
     for i, p in enumerate(pt):
@@ -1789,7 +1795,7 @@ def verify_grad(
             "numpy.random.RandomState. You may "
             "want to use tests.unittest"
             "_tools.verify_grad instead of "
-            "theano.gradient.verify_grad."
+            "aesara.gradient.verify_grad."
         )
 
     # We allow input downcast in `function`, because `numeric_grad` works in
@@ -1804,9 +1810,9 @@ def verify_grad(
     )
 
     tensor_pt = [
-        theano.tensor.type.TensorType(
-            theano.tensor.as_tensor_variable(p).dtype,
-            theano.tensor.as_tensor_variable(p).broadcastable,
+        aesara.tensor.type.TensorType(
+            aesara.tensor.as_tensor_variable(p).dtype,
+            aesara.tensor.as_tensor_variable(p).broadcastable,
         )(name=f"input {i}")
         for i, p in enumerate(pt)
     ]
@@ -1845,7 +1851,7 @@ def random_projection():
 
     # random projection of o onto t_r
     # This sum() is defined above, it's not the builtin sum.
-    cost = theano.tensor.sum(t_r * o_output)
+    cost = aesara.tensor.sum(t_r * o_output)
 
     if no_debug_ref:
         mode_for_cost = mode_not_slow(mode)
@@ -1929,9 +1935,9 @@ def jacobian(expression, wrt, consider_constant=None, disconnected_inputs="raise
 
     Parameters
     ----------
-    expression : Vector (1-dimensional) :class:`~theano.graph.basic.Variable`
+    expression : Vector (1-dimensional) :class:`~aesara.graph.basic.Variable`
         Values that we are differentiating (that we want the Jacobian of)
-    wrt : :class:`~theano.graph.basic.Variable` or list of Variables
+    wrt : :class:`~aesara.graph.basic.Variable` or list of Variables
         Term[s] with respect to which we compute the Jacobian
     consider_constant : list of variables
         Expressions not to backpropagate through
@@ -1947,21 +1953,21 @@ def jacobian(expression, wrt, consider_constant=None, disconnected_inputs="raise
 
     Returns
     -------
-    :class:`~theano.graph.basic.Variable` or list/tuple of Variables (depending upon `wrt`)
+    :class:`~aesara.graph.basic.Variable` or list/tuple of Variables (depending upon `wrt`)
         The Jacobian of `expression` with respect to (elements of) `wrt`.
         If an element of `wrt` is not differentiable with respect to the
         output, then a zero variable is returned. The return value is
         of same type as `wrt`: a list/tuple or TensorVariable in all cases.
     """
 
-    # Check inputs have the right format
-    assert isinstance(
-        expression, Variable
-    ), "theano.tensor.jacobian expects a Variable as `expression`"
-    assert expression.ndim < 2, (
-        "theano.tensor.jacobian expects a 1 dimensional variable as "
-        "`expression`. If not use flatten to make it a vector"
-    )
+    if not isinstance(expression, Variable):
+        raise TypeError("jacobian expects a Variable as `expression`")
+
+    if expression.ndim > 1:
+        raise ValueError(
+            "jacobian expects a 1 dimensional variable as `expression`."
+            " If not use flatten to make it a vector"
+        )
 
     using_list = isinstance(wrt, list)
     using_tuple = isinstance(wrt, tuple)
@@ -2002,16 +2008,12 @@ def inner_function(*args):
     # generator used n expression (because during computing gradients we are
     # just backtracking over old values. (rp Jan 2012 - if anyone has a
     # counter example please show me)
-    jacobs, updates = theano.scan(
+    jacobs, updates = aesara.scan(
         inner_function,
-        sequences=theano.tensor.arange(expression.shape[0]),
+        sequences=aesara.tensor.arange(expression.shape[0]),
         non_sequences=[expression] + wrt,
     )
-    assert not updates, (
-        "Scan has returned a list of updates. This should not "
-        "happen! Report this to theano-users (also include the "
-        "script that generated the error)"
-    )
+    assert not updates, "Scan has returned a list of updates; this should not happen."
     return format_as(using_list, using_tuple, jacobs)
 
 
@@ -2035,7 +2037,7 @@ def hessian(cost, wrt, consider_constant=None, disconnected_inputs="raise"):
 
     Returns
     -------
-    :class:`~theano.graph.basic.Variable` or list/tuple of Variables
+    :class:`~aesara.graph.basic.Variable` or list/tuple of Variables
         The Hessian of the `cost` with respect to (elements of) `wrt`.
         If an element of `wrt` is not differentiable with respect to the
         output, then a zero variable is returned. The return value is
@@ -2043,12 +2045,11 @@ def hessian(cost, wrt, consider_constant=None, disconnected_inputs="raise"):
     """
 
     # Check inputs have the right format
-    assert isinstance(
-        cost, Variable
-    ), "theano.tensor.hessian expects a Variable as `cost`"
-    assert (
-        cost.ndim == 0
-    ), "theano.tensor.hessian expects a 0 dimensional variable as `cost`"
+    if not isinstance(cost, Variable):
+        raise TypeError("hessian expects a Variable as `cost`")
+
+    if cost.ndim != 0:
+        raise ValueError("hessian expects a 0 dimensional variable as `cost`")
 
     using_list = isinstance(wrt, list)
     using_tuple = isinstance(wrt, tuple)
@@ -2060,13 +2061,15 @@ def hessian(cost, wrt, consider_constant=None, disconnected_inputs="raise"):
 
     hessians = []
     for input in wrt:
-        assert isinstance(
-            input, Variable
-        ), "theano.tensor.hessian expects a (list of) Variable as `wrt`"
-        assert input.ndim == 1, (
-            "theano.tensor.hessian expects a (list of) 1 dimensional variable "
-            "as `wrt`"
-        )
+
+        if not isinstance(input, Variable):
+            raise TypeError("hessian expects a (list of) Variable as `wrt`")
+
+        if input.ndim != 1:
+            raise ValueError(
+                "hessian expects a (list of) 1 dimensional variable as `wrt`"
+            )
+
         expr = grad(
             cost,
             input,
@@ -2077,21 +2080,19 @@ def hessian(cost, wrt, consider_constant=None, disconnected_inputs="raise"):
         # It is possible that the inputs are disconnected from expr,
         # even if they are connected to cost.
         # This should not be an error.
-        hess, updates = theano.scan(
+        hess, updates = aesara.scan(
             lambda i, y, x: grad(
                 y[i],
                 x,
                 consider_constant=consider_constant,
                 disconnected_inputs="ignore",
             ),
-            sequences=theano.tensor.arange(expr.shape[0]),
+            sequences=aesara.tensor.arange(expr.shape[0]),
             non_sequences=[expr, input],
         )
-        assert not updates, (
-            "Scan has returned a list of updates. This should not "
-            "happen! Report this to theano-users (also include the "
-            "script that generated the error)"
-        )
+        assert (
+            not updates
+        ), "Scan has returned a list of updates; this should not happen."
         hessians.append(hess)
     return format_as(using_list, using_tuple, hessians)
 
@@ -2112,9 +2113,9 @@ def _is_zero(x):
 
     no_constant_value = True
     try:
-        constant_value = theano.get_scalar_constant_value(x)
+        constant_value = aesara.get_scalar_constant_value(x)
         no_constant_value = False
-    except theano.tensor.exceptions.NotScalarConstantError:
+    except aesara.tensor.exceptions.NotScalarConstantError:
         pass
 
     if no_constant_value:
@@ -2147,7 +2148,7 @@ def consider_constant(x):
     through. In other words, the gradient of the expression is
     truncated to 0.
 
-    :param x: A Theano expression whose gradient should be truncated.
+    :param x: A Aesara expression whose gradient should be truncated.
 
     :return: The expression is returned unmodified, but its gradient
         is now truncated to 0.
@@ -2174,7 +2175,7 @@ def R_op(self, inputs, eval_points):
         if eval_points[0] is None:
             return [None]
 
-        return theano.tensor.zeros(1)
+        return aesara.tensor.zeros(1)
 
 
 zero_grad_ = ZeroGrad()
@@ -2192,12 +2193,12 @@ def zero_grad(x):
 
     Parameters
     ----------
-    x: :class:`~theano.graph.basic.Variable`
-        A Theano expression whose gradient should be truncated.
+    x: :class:`~aesara.graph.basic.Variable`
+        A Aesara expression whose gradient should be truncated.
 
     Returns
     -------
-    :class:`~theano.graph.basic.Variable`
+    :class:`~aesara.graph.basic.Variable`
         An expression equivalent to ``x``, with its gradient
         truncated to 0.
     """
@@ -2231,12 +2232,12 @@ def undefined_grad(x):
 
     Parameters
     ----------
-    x: :class:`~theano.graph.basic.Variable`
-        A Theano expression whose gradient should be undefined.
+    x: :class:`~aesara.graph.basic.Variable`
+        A Aesara expression whose gradient should be undefined.
 
     Returns
     -------
-    :class:`~theano.graph.basic.Variable`
+    :class:`~aesara.graph.basic.Variable`
         An expression equivalent to ``x``, with its gradient undefined.
     """
     return undefined_grad_(x)
@@ -2272,13 +2273,13 @@ def disconnected_grad(x):
 
     Parameters
     ----------
-    x: :class:`~theano.graph.basic.Variable`
-        A Theano expression whose gradient should not be
+    x: :class:`~aesara.graph.basic.Variable`
+        A Aesara expression whose gradient should not be
         backpropagated through.
 
     Returns
     -------
-    :class:`~theano.graph.basic.Variable`
+    :class:`~aesara.graph.basic.Variable`
         An expression equivalent to ``x``, with its gradient
         now effectively truncated to 0.
     """
@@ -2294,11 +2295,13 @@ def __init__(self, clip_lower_bound, clip_upper_bound):
         # as they do not influence the perform of this op.
         self.clip_lower_bound = clip_lower_bound
         self.clip_upper_bound = clip_upper_bound
-        assert self.clip_upper_bound >= self.clip_lower_bound
+
+        if not self.clip_upper_bound >= self.clip_lower_bound:
+            raise ValueError("`clip_upper_bound` should be >= `clip_lower_bound`")
 
     def grad(self, args, g_outs):
         return [
-            theano.tensor.clip(g_out, self.clip_lower_bound, self.clip_upper_bound)
+            aesara.tensor.clip(g_out, self.clip_lower_bound, self.clip_upper_bound)
             for g_out in g_outs
         ]
 
@@ -2320,10 +2323,10 @@ def grad_clip(x, lower_bound, upper_bound):
 
     Examples
     --------
-    >>> x = theano.tensor.type.scalar()
-    >>> z = theano.gradient.grad(grad_clip(x, -1, 1)**2, x)
-    >>> z2 = theano.gradient.grad(x**2, x)
-    >>> f = theano.function([x], outputs = [z, z2])
+    >>> x = aesara.tensor.type.scalar()
+    >>> z = aesara.gradient.grad(grad_clip(x, -1, 1)**2, x)
+    >>> z2 = aesara.gradient.grad(x**2, x)
+    >>> f = aesara.function([x], outputs = [z, z2])
     >>> print(f(2.0))
     [array(1.0), array(4.0)]
 
@@ -2357,15 +2360,15 @@ def grad_scale(x, multiplier):
 
     Examples
     --------
-    >>> x = theano.tensor.fscalar()
-    >>> fx = theano.tensor.sin(x)
-    >>> fp = theano.grad(fx, wrt=x)
-    >>> fprime = theano.function([x], fp)
+    >>> x = aesara.tensor.fscalar()
+    >>> fx = aesara.tensor.sin(x)
+    >>> fp = aesara.grad(fx, wrt=x)
+    >>> fprime = aesara.function([x], fp)
     >>> print(fprime(2))  # doctest: +ELLIPSIS
     -0.416...
     >>> f_inverse=grad_scale(fx, -1.)
-    >>> fpp = theano.grad(f_inverse, wrt=x)
-    >>> fpprime = theano.function([x], fpp)
+    >>> fpp = aesara.grad(f_inverse, wrt=x)
+    >>> fpprime = aesara.function([x], fpp)
     >>> print(fpprime(2))  # doctest: +ELLIPSIS
     0.416...
     """
diff --git a/theano/graph/__init__.py b/aesara/graph/__init__.py
similarity index 100%
rename from theano/graph/__init__.py
rename to aesara/graph/__init__.py
diff --git a/theano/graph/basic.py b/aesara/graph/basic.py
similarity index 97%
rename from theano/graph/basic.py
rename to aesara/graph/basic.py
index 11720ca7cd..9f50a4cf8d 100644
--- a/theano/graph/basic.py
+++ b/aesara/graph/basic.py
@@ -23,8 +23,8 @@
 
 import numpy as np
 
-from theano.configdefaults import config
-from theano.graph.utils import (
+from aesara.configdefaults import config
+from aesara.graph.utils import (
     MetaObject,
     MethodNotDefined,
     Scratchpad,
@@ -33,7 +33,7 @@
     add_tag_trace,
     get_variable_trace_string,
 )
-from theano.misc.ordered_set import OrderedSet
+from aesara.misc.ordered_set import OrderedSet
 
 
 T = TypeVar("T")
@@ -42,7 +42,7 @@
 
 
 class Node(MetaObject):
-    """A `Node` in a Theano graph.
+    """A `Node` in an Aesara graph.
 
     Currently, graphs contain two kinds of `Nodes`: `Variable`s and `Apply`s.
     Edges in the graph are not explicitly represented.  Instead each `Node`
@@ -81,7 +81,7 @@ class Apply(Node):
     This class is typically instantiated by a `Op.make_node` method, which
     is called by `Op.__call__`.
 
-    The function `theano.compile.function.function` uses `Apply.inputs`
+    The function `aesara.compile.function.function` uses `Apply.inputs`
     together with `Variable.owner` to search the expression graph and determine
     which inputs are necessary to compute the function's outputs.
 
@@ -287,7 +287,7 @@ class Variable(Node):
     A :term:`Variable` is a node in an expression graph that represents a
     variable.
 
-    The inputs and outputs of every `Apply` (theano.graph.basic.Apply) are `Variable`
+    The inputs and outputs of every `Apply` (aesara.graph.basic.Apply) are `Variable`
     instances. The input and output arguments to create a `function` are also
     `Variable` instances. A `Variable` is like a strongly-typed variable in
     some other languages; each `Variable` contains a reference to a `Type`
@@ -357,21 +357,21 @@ class Variable(Node):
 
     .. code-block:: python
 
-        import theano
-        import theano.tensor as tt
+        import aesara
+        import aesara.tensor as tt
 
         a = tt.constant(1.5)            # declare a symbolic constant
         b = tt.fscalar()                # declare a symbolic floating-point scalar
 
         c = a + b                       # create a simple expression
 
-        f = theano.function([b], [c])   # this works because a has a value associated with it already
+        f = aesara.function([b], [c])   # this works because a has a value associated with it already
 
         assert 4.0 == f(2.5)            # bind 2.5 to an internal copy of b and evaluate an internal c
 
-        theano.function([a], [c])       # compilation error because b (required by c) is undefined
+        aesara.function([a], [c])       # compilation error because b (required by c) is undefined
 
-        theano.function([a,b], [c])     # compilation error because a is constant, it can't be an input
+        aesara.function([a,b], [c])     # compilation error because a is constant, it can't be an input
 
 
     The python variables :literal:`a,b,c` all refer to instances of type
@@ -511,20 +511,20 @@ def eval(self, inputs_to_values=None):
         Parameters
         ----------
         inputs_to_values
-            A dictionary mapping theano Variables to values.
+            A dictionary mapping aesara Variables to values.
 
         Examples
         --------
 
         >>> import numpy as np
-        >>> import theano.tensor as tt
+        >>> import aesara.tensor as tt
         >>> x = tt.dscalar('x')
         >>> y = tt.dscalar('y')
         >>> z = x + y
         >>> np.allclose(z.eval({x : 16.3, y : 12.1}), 28.4)
         True
 
-        We passed :func:`eval` a dictionary mapping symbolic theano
+        We passed :func:`eval` a dictionary mapping symbolic aesara
         variables to the values to substitute for them, and it returned
         the numerical value of the expression.
 
@@ -536,10 +536,10 @@ def eval(self, inputs_to_values=None):
         the scenes. Subsequent calls to :func:`eval` on that same variable
         will be fast, because the variable caches the compiled function.
 
-        This way of computing has more overhead than a normal Theano
+        This way of computing has more overhead than a normal Aesara
         function, so don't use it too much in real scripts.
         """
-        from theano.compile.function import function
+        from aesara.compile.function import function
 
         if inputs_to_values is None:
             inputs_to_values = {}
@@ -982,8 +982,8 @@ def clone_replace(
 
     Parameters
     ----------
-    output : Theano Variables (or Theano expressions)
-        Theano expression that represents the computational graph.
+    output : Aesara Variables (or Aesara expressions)
+        Aesara expression that represents the computational graph.
     replace : dict
         Dictionary describing which subgraphs should be replaced by what.
     share_inputs : bool
@@ -993,7 +993,7 @@ def clone_replace(
         value.
 
     """
-    from theano.compile.function.pfunc import rebuild_collect_shared
+    from aesara.compile.function.pfunc import rebuild_collect_shared
 
     if isinstance(replace, dict):
         items = list(replace.items())
@@ -1505,7 +1505,7 @@ def observer(node):
 
 
 def equal_computations(xs, ys, in_xs=None, in_ys=None):
-    """Checks if Theano graphs represent the same computations.
+    """Checks if Aesara graphs represent the same computations.
 
     The two lists `xs`, `ys` should have the same number of entries. The
     function checks if for any corresponding pair `(x,y)` from `zip(xs,ys)`
diff --git a/theano/graph/callcache.py b/aesara/graph/callcache.py
similarity index 96%
rename from theano/graph/callcache.py
rename to aesara/graph/callcache.py
index db30802f01..0aefc842df 100644
--- a/theano/graph/callcache.py
+++ b/aesara/graph/callcache.py
@@ -2,7 +2,7 @@
 import pickle
 
 
-_logger = logging.getLogger("theano.graph.callcache")
+_logger = logging.getLogger("aesara.graph.callcache")
 
 
 class CallCache:
diff --git a/theano/graph/destroyhandler.py b/aesara/graph/destroyhandler.py
similarity index 99%
rename from theano/graph/destroyhandler.py
rename to aesara/graph/destroyhandler.py
index 101aaddef3..a43f7f5d09 100644
--- a/theano/graph/destroyhandler.py
+++ b/aesara/graph/destroyhandler.py
@@ -6,12 +6,12 @@
 import itertools
 from collections import OrderedDict, deque
 
-import theano
-from theano.configdefaults import config
-from theano.graph.basic import Constant
-from theano.graph.fg import InconsistencyError
-from theano.graph.toolbox import AlreadyThere, Bookkeeper
-from theano.misc.ordered_set import OrderedSet
+import aesara
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant
+from aesara.graph.fg import InconsistencyError
+from aesara.graph.toolbox import AlreadyThere, Bookkeeper
+from aesara.misc.ordered_set import OrderedSet
 
 
 class ProtocolError(Exception):
@@ -233,7 +233,7 @@ def fast_inplace_check(fgraph, inputs):
         Inputs Variable that you want to use as inplace destination.
 
     """
-    Supervisor = theano.compile.function.types.Supervisor
+    Supervisor = aesara.compile.function.types.Supervisor
     protected_inputs = [
         f.protected for f in fgraph._features if isinstance(f, Supervisor)
     ]
diff --git a/theano/graph/fg.py b/aesara/graph/fg.py
similarity index 95%
rename from theano/graph/fg.py
rename to aesara/graph/fg.py
index 3c5b1d1074..a0eddd11a3 100644
--- a/theano/graph/fg.py
+++ b/aesara/graph/fg.py
@@ -3,15 +3,15 @@
 from collections import OrderedDict
 from io import StringIO
 
-import theano
-from theano.configdefaults import config
-from theano.graph import toolbox, utils
-from theano.graph.basic import Apply, Constant, Variable, applys_between
-from theano.graph.basic import as_string as graph_as_string
-from theano.graph.basic import clone as clone_graph
-from theano.graph.basic import clone_get_equiv, io_toposort, vars_between
-from theano.graph.utils import TestValueError, get_variable_trace_string
-from theano.misc.ordered_set import OrderedSet
+import aesara
+from aesara.configdefaults import config
+from aesara.graph import toolbox, utils
+from aesara.graph.basic import Apply, Constant, Variable, applys_between
+from aesara.graph.basic import as_string as graph_as_string
+from aesara.graph.basic import clone as clone_graph
+from aesara.graph.basic import clone_get_equiv, io_toposort, vars_between
+from aesara.graph.utils import TestValueError, get_variable_trace_string
+from aesara.misc.ordered_set import OrderedSet
 
 
 class InconsistencyError(Exception):
@@ -42,13 +42,13 @@ def __init__(self, *args, **kwargs):
 class FunctionGraph(utils.MetaObject):
     """
     A `FunctionGraph` represents a subgraph bound by a set of input variables and
-    a set of output variables, ie a subgraph that specifies a theano function.
+    a set of output variables, ie a subgraph that specifies an Aesara function.
     The inputs list should contain all the inputs on which the outputs depend.
     `Variable`s of type `Constant` are not counted as inputs.
 
     The `FunctionGraph` supports the replace operation which allows to replace
     a variable in the subgraph by another, e.g. replace ``(x + x).out`` by ``(2
-    * x).out``. This is the basis for optimization in Theano.
+    * x).out``. This is the basis for optimization in Aesara.
 
     This class is also responsible for verifying that a graph is valid
     (ie, all the dtypes and broadcast patterns are compatible with the
@@ -97,14 +97,14 @@ def __init__(self, inputs, outputs, features=None, clone=True, update_mapping=No
 
         Parameters
         ----------
-        inputs : list of theano.graph.basic.Variable
+        inputs : list of aesara.graph.basic.Variable
             Inputs nodes of the graph, usually declared by the user
-        outputs : list of theano.graph.basic.Variable
+        outputs : list of aesara.graph.basic.Variable
             Outputs nodes of the graph.
         clone : boolean
             If true, we will clone the graph. This is useful to remove the
             constant cache problem.
-        features : list of theano.graph.toolbox.Feature
+        features : list of aesara.graph.toolbox.Feature
             A list of features to be added to the `FunctionGraph`.
         update_mapping : dict
             Mapping between the inputs with updates and the outputs
@@ -171,7 +171,7 @@ def add_input(self, var, check=True):
 
         Parameters
         ----------
-        var : theano.graph.basic.Variable
+        var : aesara.graph.basic.Variable
 
         """
         if check and var in self.inputs:
@@ -186,7 +186,7 @@ def setup_var(self, var):
 
         Parameters
         ----------
-        var : theano.graph.basic.Variable
+        var : aesara.graph.basic.Variable
 
         """
         self.clients.setdefault(var, [])
@@ -196,7 +196,7 @@ def setup_node(self, node):
 
         Parameters
         ----------
-        node : theano.graph.basic.Apply
+        node : aesara.graph.basic.Apply
 
         """
         if hasattr(node.op, "view_map") and not all(
@@ -320,7 +320,7 @@ def import_var(self, var, reason):
 
         Parameters:
         ----------
-        variable : theano.graph.basic.Variable
+        variable : aesara.graph.basic.Variable
             The variable to be imported.
         reason : str
             The name of the optimization or operation in progress.
@@ -333,7 +333,7 @@ def import_var(self, var, reason):
             and not isinstance(var, Constant)
             and var not in self.inputs
         ):
-            from theano.graph.null_type import NullType
+            from aesara.graph.null_type import NullType
 
             if isinstance(var.type, NullType):
                 raise TypeError(
@@ -348,7 +348,7 @@ def import_node(self, apply_node, check=True, reason=None):
 
         Parameters:
         ----------
-        apply_node : theano.graph.basic.Apply
+        apply_node : aesara.graph.basic.Apply
             The node to be imported.
         check : bool
             Check that the inputs for the imported nodes are also present in
@@ -377,7 +377,7 @@ def import_node(self, apply_node, check=True, reason=None):
                             f"Input {int(node.inputs.index(var))} of the graph (indices start "
                             f"from 0), used to compute {node}, was not "
                             "provided and not given a value. Use the "
-                            "Theano flag exception_verbosity='high', "
+                            "Aesara flag exception_verbosity='high', "
                             "for more information on this error."
                         )
                         raise MissingInputError(error_msg, variable=var)
@@ -410,13 +410,13 @@ def change_input(self, node, i, new_var, reason=None):
 
         Parameters
         ----------
-        node : theano.graph.basic.Apply or str
+        node : aesara.graph.basic.Apply or str
             The node for which an input is to be changed.  If the value is
             the string ``"output"`` then the ``self.outputs`` will be used
             instead of ``node.inputs``.
         i : int
             The index in `node.inputs` that we want to change.
-        new_var : theano.graph.basic.Variable
+        new_var : aesara.graph.basic.Variable
             The new variable to take the place of ``node.inputs[i]``.
 
         """
@@ -461,9 +461,9 @@ def replace(self, var, new_var, reason=None, verbose=None):
 
         Parameters:
         ----------
-        var : theano.graph.basic.Variable
+        var : aesara.graph.basic.Variable
             The variable to be replaced.
-        new_var : theano.graph.basic.Variable
+        new_var : aesara.graph.basic.Variable
             The variable to replace `var`.
         reason : str
             The name of the optimization or operation in progress.
@@ -482,7 +482,7 @@ def replace(self, var, new_var, reason=None, verbose=None):
             if new_var_2 is None or new_var_2.type != var.type:
                 done = dict()
                 used_ids = dict()
-                old = theano.compile.debugmode.debugprint(
+                old = aesara.compile.debugmode.debugprint(
                     var,
                     prefix="  ",
                     depth=6,
@@ -491,7 +491,7 @@ def replace(self, var, new_var, reason=None, verbose=None):
                     print_type=True,
                     used_ids=used_ids,
                 ).getvalue()
-                new = theano.compile.debugmode.debugprint(
+                new = aesara.compile.debugmode.debugprint(
                     new_var,
                     prefix="  ",
                     depth=6,
@@ -520,8 +520,8 @@ def replace(self, var, new_var, reason=None, verbose=None):
 
         if config.compute_test_value != "off":
             try:
-                tval = theano.graph.op.get_test_value(var)
-                new_tval = theano.graph.op.get_test_value(new_var)
+                tval = aesara.graph.op.get_test_value(var)
+                new_tval = aesara.graph.op.get_test_value(new_var)
             except TestValueError:
                 pass
             else:
diff --git a/theano/graph/null_type.py b/aesara/graph/null_type.py
similarity index 96%
rename from theano/graph/null_type.py
rename to aesara/graph/null_type.py
index fa723b663e..7487253156 100644
--- a/theano/graph/null_type.py
+++ b/aesara/graph/null_type.py
@@ -1,4 +1,4 @@
-from theano.graph.type import Type
+from aesara.graph.type import Type
 
 
 class NullType(Type):
diff --git a/theano/graph/op.py b/aesara/graph/op.py
similarity index 97%
rename from theano/graph/op.py
rename to aesara/graph/op.py
index 4ff8756db4..786ad2695f 100644
--- a/theano/graph/op.py
+++ b/aesara/graph/op.py
@@ -29,24 +29,21 @@
 
 import numpy as np
 
-import theano
-from theano.configdefaults import config
-from theano.graph.basic import Apply, NoParams, Variable
-from theano.graph.fg import FunctionGraph
-from theano.graph.params_type import Params, ParamsType
-from theano.graph.utils import (
+import aesara
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, NoParams, Variable
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.params_type import Params, ParamsType
+from aesara.graph.utils import (
     MetaObject,
     MethodNotDefined,
     TestValueError,
     add_tag_trace,
     get_variable_trace_string,
 )
-from theano.link.c.interface import CLinkerOp
+from aesara.link.c.interface import CLinkerOp
 
 
-__authors__ = "theano-dev" "PyMC Developers"
-__copyright__ = "(c) 2010, Universite de Montreal"
-
 __docformat__ = "restructuredtext en"
 
 StorageMapType = List[Optional[List[Any]]]
@@ -566,8 +563,8 @@ def make_c_thunk(
         """
         # FIXME: Putting the following import on the module level causes an import cycle.
         #        The conclusion should be that the antire "make_c_thunk" method should be defined
-        #        in theano.link.c and dispatched onto the Op!
-        import theano.link.c.basic
+        #        in aesara.link.c and dispatched onto the Op!
+        import aesara.link.c.basic
 
         node_input_storage = [storage_map[r] for r in node.inputs]
         node_output_storage = [storage_map[r] for r in node.outputs]
@@ -578,7 +575,7 @@ def make_c_thunk(
             for (new_o, old_o) in zip(e.outputs, node.outputs)
             if old_o in no_recycling
         ]
-        cl = theano.link.c.basic.CLinker().accept(e, no_recycling=e_no_recycling)
+        cl = aesara.link.c.basic.CLinker().accept(e, no_recycling=e_no_recycling)
         # float16 gets special treatment since running
         # unprepared C code will get bad results.
         if not getattr(self, "_f16_ok", False):
@@ -654,7 +651,7 @@ def get_test_value(v: Variable) -> Any:
 
     """
     if not isinstance(v, Variable):
-        v = theano.tensor.as_tensor_variable(v)
+        v = aesara.tensor.as_tensor_variable(v)
 
     return v.get_test_value()
 
@@ -733,7 +730,7 @@ def get_test_values(*args: Variable) -> Union[Any, List[Any]]:
 
 ops_with_inner_function: Dict[Op, Text] = {}
 """
-Registry of Ops that have an inner compiled Theano function.
+Registry of Ops that have an inner compiled Aesara function.
 
 The keys are Op classes (not instances), and values are the name of the
 attribute that contains the function. For instance, if the function is
@@ -799,7 +796,7 @@ def c_headers(self, **kwargs):
     @staticmethod
     def test_gxx_support():
         """Check if openMP is supported."""
-        from theano.link.c.cmodule import GCC_compiler
+        from aesara.link.c.cmodule import GCC_compiler
 
         code = """
         #include <omp.h>
@@ -831,8 +828,8 @@ def update_self_openmp(self) -> NoReturn:
                         "Your g++ compiler fails to compile OpenMP code. We"
                         " know this happen with some version of the EPD mingw"
                         " compiler and LLVM compiler on Mac OS X."
-                        " We disable openmp everywhere in Theano."
-                        " To remove this warning set the theano flags `openmp`"
+                        " We disable openmp everywhere in Aesara."
+                        " To remove this warning set the aesara flags `openmp`"
                         " to False.",
                         stacklevel=3,
                     )
@@ -895,7 +892,7 @@ class ExternalCOp(COp):
         r"^#section ([a-zA-Z0-9_]+)$", re.MULTILINE
     )
     backward_re: ClassVar[Pattern] = re.compile(
-        r"^THEANO_(APPLY|SUPPORT)_CODE_SECTION$", re.MULTILINE
+        r"^AESARA_(APPLY|SUPPORT)_CODE_SECTION$", re.MULTILINE
     )
     # This is the set of allowed markers
     SECTIONS: ClassVar[Set[Text]] = {
@@ -936,7 +933,7 @@ def __init__(
 
         self.func_name = func_name
         # Keep the original name. If we reload old pickle, we want to
-        # find the new path and new version of the file in Theano.
+        # find the new path and new version of the file in Aesara.
         self.func_files = func_files
         self.load_c_code(func_files)
 
@@ -1032,12 +1029,12 @@ def __get_op_params(self) -> List[Text]:
         The names must be strings that are not a C keyword and the
         values must be strings of literal C representations.
 
-        If op uses a :class:`theano.graph.params_type.ParamsType` as ``params_type``,
+        If op uses a :class:`aesara.graph.params_type.ParamsType` as ``params_type``,
         it returns:
          - a default macro ``PARAMS_TYPE`` which defines the class name of the
            corresponding C struct.
          - a macro ``DTYPE_PARAM_key`` for every ``key`` in the ParamsType for which associated
-           type implements the method :func:`theano.graph.type.CLinkerType.c_element_type`.
+           type implements the method :func:`aesara.graph.type.CLinkerType.c_element_type`.
            ``DTYPE_PARAM_key`` defines the primitive C type name of an item in a variable
            associated to ``key``.
 
@@ -1049,7 +1046,7 @@ def __get_op_params(self) -> List[Text]:
                 c_type = wrapper.types[i].c_element_type()
                 if c_type:
                     # NB (reminder): These macros are currently used only in ParamsType example test
-                    # (`theano/graph/tests/test_quadratic_function.c`), to demonstrate how we can
+                    # (`aesara/graph/tests/test_quadratic_function.c`), to demonstrate how we can
                     # access params dtypes when dtypes may change (e.g. if based on config.floatX).
                     # But in practice, params types generally have fixed types per op.
                     params.append(
diff --git a/theano/graph/opt.py b/aesara/graph/opt.py
similarity index 98%
rename from theano/graph/opt.py
rename to aesara/graph/opt.py
index 7a4175b7b7..eb1fbfb01f 100644
--- a/theano/graph/opt.py
+++ b/aesara/graph/opt.py
@@ -19,11 +19,11 @@
 
 import numpy as np
 
-import theano
-from theano.assert_op import Assert, assert_op
-from theano.configdefaults import config
-from theano.graph import destroyhandler as dh
-from theano.graph.basic import (
+import aesara
+from aesara.assert_op import Assert, assert_op
+from aesara.configdefaults import config
+from aesara.graph import destroyhandler as dh
+from aesara.graph.basic import (
     Apply,
     Constant,
     Variable,
@@ -31,15 +31,15 @@
     io_toposort,
     nodes_constructed,
 )
-from theano.graph.fg import InconsistencyError
-from theano.graph.op import Op
-from theano.graph.toolbox import Feature, NodeFinder
-from theano.graph.utils import AssocList
-from theano.misc.ordered_set import OrderedSet
-from theano.utils import flatten
+from aesara.graph.fg import InconsistencyError
+from aesara.graph.op import Op
+from aesara.graph.toolbox import Feature, NodeFinder
+from aesara.graph.utils import AssocList
+from aesara.misc.ordered_set import OrderedSet
+from aesara.utils import flatten
 
 
-_logger = logging.getLogger("theano.graph.opt")
+_logger = logging.getLogger("aesara.graph.opt")
 _optimizer_idx = [0]
 
 
@@ -1114,10 +1114,10 @@ def transform(self, fgraph, node, *args, **kwargs):
         givens = {}
         missing = set()
         for input in node.inputs:
-            if isinstance(input, theano.compile.SharedVariable):
+            if isinstance(input, aesara.compile.SharedVariable):
                 pass
             elif hasattr(input.tag, "test_value"):
-                givens[input] = theano.shared(
+                givens[input] = aesara.shared(
                     input.type.filter(input.tag.test_value),
                     input.name,
                     broadcastable=input.broadcastable,
@@ -1147,7 +1147,7 @@ def transform(self, fgraph, node, *args, **kwargs):
             outputs = opt.transform(fgraph, node, *args, **kwargs)
             if outputs:
                 try:
-                    fn = theano.function(
+                    fn = aesara.function(
                         [], outputs, givens=givens, on_unused_input="ignore"
                     )
                     fn.trust_input = True
@@ -1678,7 +1678,7 @@ def transform(self, fgraph, node, get_nodes=True):
         constructs out_pattern and performs the replacement.
 
         """
-        from theano.graph import unify
+        from aesara.graph import unify
 
         if get_nodes and self.get_nodes is not None:
             for real_node in self.get_nodes(fgraph, node):
@@ -1749,7 +1749,7 @@ def retry_with_equiv():
                 else:
                     u = u.merge(expr, v)
             elif isinstance(pattern, (int, float)) and isinstance(expr, Constant):
-                if np.all(theano.tensor.constant(pattern).value == expr.value):
+                if np.all(aesara.tensor.constant(pattern).value == expr.value):
                     return u
                 else:
                     return retry_with_equiv()
@@ -2598,7 +2598,7 @@ def chin(node, i, r, new_r, reason):
             msg = (
                 f"EquilibriumOptimizer max'ed out by '{opt_name}'"
                 + ". You can safely raise the current threshold of "
-                + "{config.optdb__max_use_ratio:f} with the theano flag 'optdb__max_use_ratio'."
+                + "{config.optdb__max_use_ratio:f} with the aesara flag 'optdb__max_use_ratio'."
             )
             if config.on_opt_error == "raise":
                 raise AssertionError(msg)
@@ -3084,15 +3084,15 @@ def check_stack_trace(f_or_fgraph, ops_to_check="last", bug_print="raise"):
 
     Parameters
     ----------
-    f_or_fgraph: theano.compile.function.types.Function or
-          theano.graph.fg.FunctionGraph
+    f_or_fgraph: aesara.compile.function.types.Function or
+          aesara.graph.fg.FunctionGraph
         The compiled function or the function graph to be analysed.
     ops_to_check: it can be of four different types:
-          - classes or instances inheriting from theano.graph.op.Op
-          - tuple/list of classes or instances inheriting from theano.graph.op.Op
+          - classes or instances inheriting from aesara.graph.op.Op
+          - tuple/list of classes or instances inheriting from aesara.graph.op.Op
           - string
           - function returning a boolean and taking as input an instance of
-            theano.graph.op.Op.
+            aesara.graph.op.Op.
         - if ops_to_check is a string, it should be either 'last' or 'all'.
           'last' will check only the last op of the graph while 'all' will
           check all the ops of the graph.
@@ -3100,7 +3100,7 @@ def check_stack_trace(f_or_fgraph, ops_to_check="last", bug_print="raise"):
           check that all the outputs of their occurrences in the graph have a
           stack trace.
         - if ops_to_check is a function, it should take as input a
-          theano.graph.op.Op and return a boolean indicating if the input op should
+          aesara.graph.op.Op and return a boolean indicating if the input op should
           be checked or not.
     bug_print: string belonging to {'raise', 'warn', 'ignore'}
         You can specify the behaviour of the function when the specified
@@ -3113,9 +3113,9 @@ def check_stack_trace(f_or_fgraph, ops_to_check="last", bug_print="raise"):
         True if the outputs of the specified ops have a stack, False otherwise.
 
     """
-    if isinstance(f_or_fgraph, theano.compile.function.types.Function):
+    if isinstance(f_or_fgraph, aesara.compile.function.types.Function):
         fgraph = f_or_fgraph.maker.fgraph
-    elif isinstance(f_or_fgraph, theano.graph.fg.FunctionGraph):
+    elif isinstance(f_or_fgraph, aesara.graph.fg.FunctionGraph):
         fgraph = f_or_fgraph
     else:
         raise ValueError("The type of f_or_fgraph is not supported")
diff --git a/theano/graph/optdb.py b/aesara/graph/optdb.py
similarity index 98%
rename from theano/graph/optdb.py
rename to aesara/graph/optdb.py
index 9df91977bc..9ee9e46537 100644
--- a/theano/graph/optdb.py
+++ b/aesara/graph/optdb.py
@@ -3,10 +3,10 @@
 import sys
 from io import StringIO
 
-from theano.configdefaults import config
-from theano.graph import opt
-from theano.misc.ordered_set import OrderedSet
-from theano.utils import DefaultOrderedDict
+from aesara.configdefaults import config
+from aesara.graph import opt
+from aesara.misc.ordered_set import OrderedSet
+from aesara.utils import DefaultOrderedDict
 
 
 class DB:
@@ -339,7 +339,7 @@ class SequenceDB(DB):
     No matter which optimizations are selected by a query, they are carried
     out in order of increasing position.
 
-    The optdb itself (`theano.compile.mode.optdb`), from which (among many
+    The optdb itself (`aesara.compile.mode.optdb`), from which (among many
     other tags) fast_run and fast_compile optimizers are drawn is a SequenceDB.
 
     """
diff --git a/theano/graph/params_type.py b/aesara/graph/params_type.py
similarity index 92%
rename from theano/graph/params_type.py
rename to aesara/graph/params_type.py
index fb4aaebfc0..402a6607b4 100644
--- a/theano/graph/params_type.py
+++ b/aesara/graph/params_type.py
@@ -1,12 +1,12 @@
 """
 Module for wrapping many Op parameters into one object available in both Python and C code.
 
-The module provides the main public class :class:`ParamsType` that allows to bundle many Theano types
+The module provides the main public class :class:`ParamsType` that allows to bundle many Aesara types
 into one parameter type, and an internal convenient class :class:`Params` which will be automatically
 used to create a Params object that is compatible with the ParamsType defined.
 
 The Params object will be available in both Python code (as a standard Python object) and C code
-(as a specific struct with parameters as struct fields). To be fully-available in C code, Theano
+(as a specific struct with parameters as struct fields). To be fully-available in C code, Aesara
 types wrapped into a ParamsType must provide a C interface (e.g. TensorType, Scalar, GpuArrayType,
 or your own type. See :ref:`extending_op_params` for more details).
 
@@ -18,12 +18,12 @@
 .. code-block:: python
 
     # Import ParamsType class.
-    from theano.graph.params_type import ParamsType
+    from aesara.graph.params_type import ParamsType
 
     # If you want to use a tensor and a scalar as parameters,
-    # you should import required Theano types.
-    from theano.tensor.type import TensorType
-    from theano.scalar import Scalar
+    # you should import required Aesara types.
+    from aesara.tensor.type import TensorType
+    from aesara.scalar import Scalar
 
 In your Op sub-class:
 
@@ -60,22 +60,22 @@ def __init__(value_attr1, value_attr2):
     /* You won't need to free them or whatever else. */
 
 
-See :class:`QuadraticOpFunc` and :class:`QuadraticCOpFunc` in ``theano/graph/tests/test_params_type.py``
+See :class:`QuadraticOpFunc` and :class:`QuadraticCOpFunc` in ``aesara/graph/tests/test_params_type.py``
 for complete working examples.
 
-Combining ParamsType with Theano enumeration types
+Combining ParamsType with Aesara enumeration types
 --------------------------------------------------
 
-Theano provide some enumeration types that allow to create constant primitive values (integer and floating values)
-available in both Python and C code. See :class:`theano.graph.type.EnumType` and its subclasses for more details.
+Aesara provide some enumeration types that allow to create constant primitive values (integer and floating values)
+available in both Python and C code. See :class:`aesara.graph.type.EnumType` and its subclasses for more details.
 
-If your ParamsType contains Theano enumeration types, then constants defined inside these
+If your ParamsType contains Aesara enumeration types, then constants defined inside these
 enumerations will be directly available as ParamsType attributes.
 
 **Example**::
 
-    from theano.graph.params_type import ParamsType
-    from theano.graph.type import EnumType, EnumList
+    from aesara.graph.params_type import ParamsType
+    from aesara.graph.type import EnumType, EnumList
 
     wrapper = ParamsType(enum1=EnumList('CONSTANT_1', 'CONSTANT_2', 'CONSTANT_3'),
                          enum2=EnumType(PI=3.14, EPSILON=0.001))
@@ -99,7 +99,7 @@ def __init__(value_attr1, value_attr2):
                          enum2=EnumType(CONSTANT_1=0, CONSTANT_3=5))
 
 If your enum types contain constant aliases, you can retrive them from ParamsType
-with ``ParamsType.enum_from_alias(alias)`` method (see :class:`theano.graph.type.EnumType`
+with ``ParamsType.enum_from_alias(alias)`` method (see :class:`aesara.graph.type.EnumType`
 for more info about enumeration aliases).
 
 .. code-block:: python
@@ -117,8 +117,8 @@ def __init__(value_attr1, value_attr2):
 import hashlib
 import re
 
-from theano.graph.type import CType, EnumType
-from theano.graph.utils import MethodNotDefined
+from aesara.graph.type import CType, EnumType
+from aesara.graph.utils import MethodNotDefined
 
 
 # Set of C and C++ keywords as defined (at March 2nd, 2017) in the pages below:
@@ -235,8 +235,8 @@ class Params(dict):
 
     .. code-block:: python
 
-        from theano.graph.params_type import ParamsType, Params
-        from theano.scalar import Scalar
+        from aesara.graph.params_type import ParamsType, Params
+        from aesara.scalar import Scalar
         # You must create a ParamsType first:
         params_type = ParamsType(attr1=Scalar('int32'),
                                  key2=Scalar('float32'),
@@ -318,12 +318,12 @@ def __ne__(self, other):
 
 class ParamsType(CType):
     """
-    This class can create a struct of Theano types (like `TensorType`,
+    This class can create a struct of Aesara types (like `TensorType`,
     `GpuArrayType`, etc.)  to be used as a convenience op parameter wrapping
     many data.
 
     `ParamsType` constructor takes key-value args.  Key will be the name of the
-    attribute in the struct.  Value is the Theano type of this attribute,
+    attribute in the struct.  Value is the Aesara type of this attribute,
     ie. an instance of (a subclass of) :class:`CType`
     (eg. ``TensorType('int64', (False,))``).
 
@@ -363,7 +363,7 @@ def __init__(self, **kwargs):
             type_name = type_instance.__class__.__name__
             if not isinstance(type_instance, CType):
                 raise TypeError(
-                    'ParamsType: attribute "%s" should inherit from Theano CType, got "%s".'
+                    'ParamsType: attribute "%s" should inherit from Aesara CType, got "%s".'
                     % (attribute_name, type_name)
                 )
 
@@ -458,47 +458,47 @@ def generate_struct_name(self):
         types_hex = hashlib.sha256(types_string).hexdigest()
         return f"_Params_{fields_hex}_{types_hex}"
 
-    def has_type(self, theano_type):
+    def has_type(self, aesara_type):
         """
-        Return True if current ParamsType contains the specified Theano type.
+        Return True if current ParamsType contains the specified Aesara type.
 
         """
-        return theano_type in self.types
+        return aesara_type in self.types
 
     def get_type(self, field_name):
         """
-        Return the Theano type associated to the given field name
+        Return the Aesara type associated to the given field name
         in the current ParamsType.
 
         """
         return self.types[self.fields.index(field_name)]
 
-    def get_field(self, theano_type):
+    def get_field(self, aesara_type):
         """
         Return the name (string) of the first field associated to
-        the given Theano type. Fields are sorted in lexicographic
-        order. Raise an exception if this Theano type is not
+        the given Aesara type. Fields are sorted in lexicographic
+        order. Raise an exception if this Aesara type is not
         in the current ParamsType.
 
         This method is intended to be used to retrieve a field name
         when we know that current ParamsType contains the given
-        Theano type only once.
+        Aesara type only once.
 
         """
-        return self.fields[self.types.index(theano_type)]
+        return self.fields[self.types.index(aesara_type)]
 
     def get_enum(self, key):
         """
-        Look for a constant named ``key`` in the Theano enumeration types
+        Look for a constant named ``key`` in the Aesara enumeration types
         wrapped into current ParamsType. Return value of the constant found,
         or raise an exception if either the constant is not found or
-        current wrapper does not contain any Theano enumeration type.
+        current wrapper does not contain any Aesara enumeration type.
 
         **Example**::
 
-            from theano.graph.params_type import ParamsType
-            from theano.graph.type import EnumType, EnumList
-            from theano.scalar import Scalar
+            from aesara.graph.params_type import ParamsType
+            from aesara.graph.type import EnumType, EnumList
+            from aesara.scalar import Scalar
 
             wrapper = ParamsType(scalar=Scalar('int32'),
                                  letters=EnumType(A=1, B=2, C=3),
@@ -515,19 +515,19 @@ def get_enum(self, key):
 
     def enum_from_alias(self, alias):
         """
-        Look for a constant that has alias ``alias`` in the Theano enumeration types
+        Look for a constant that has alias ``alias`` in the Aesara enumeration types
         wrapped into current ParamsType. Return value of the constant found,
         or raise an exception if either
 
         1. there is no constant with this alias,
         2. there is no constant which name is this alias, or
-        3. current wrapper does not contain any Theano enumeration type.
+        3. current wrapper does not contain any Aesara enumeration type.
 
         **Example**::
 
-            from theano.graph.params_type import ParamsType
-            from theano.graph.type import EnumType, EnumList
-            from theano.scalar import Scalar
+            from aesara.graph.params_type import ParamsType
+            from aesara.graph.type import EnumType, EnumList
+            from aesara.scalar import Scalar
 
             wrapper = ParamsType(scalar=Scalar('int32'),
                                  letters=EnumType(A=(1, 'alpha'), B=(2, 'beta'), C=3),
@@ -572,9 +572,9 @@ def get_params(self, *objects, **kwargs):
         **Example**::
 
             import numpy
-            from theano.graph.params_type import ParamsType
-            from theano.tensor.type import dmatrix
-            from theano.scalar import Scalar
+            from aesara.graph.params_type import ParamsType
+            from aesara.tensor.type import dmatrix
+            from aesara.scalar import Scalar
 
             class MyObject:
                 def __init__(self):
diff --git a/theano/graph/sandbox/typeattr.txt b/aesara/graph/sandbox/typeattr.txt
similarity index 100%
rename from theano/graph/sandbox/typeattr.txt
rename to aesara/graph/sandbox/typeattr.txt
diff --git a/theano/graph/sched.py b/aesara/graph/sched.py
similarity index 97%
rename from theano/graph/sched.py
rename to aesara/graph/sched.py
index 0633c5d214..f56189f70a 100644
--- a/theano/graph/sched.py
+++ b/aesara/graph/sched.py
@@ -1,7 +1,7 @@
 from collections import defaultdict
 
-from theano.graph.basic import list_of_nodes
-from theano.utils import cmp
+from aesara.graph.basic import list_of_nodes
+from aesara.utils import cmp
 
 
 # {{{ http://code.activestate.com/recipes/578231/ (r1)
@@ -233,9 +233,9 @@ def sort_apply_nodes(inputs, outputs, cmps):
 
     Examples
     --------
-    >>> from theano.graph.basic import sort_apply_nodes, dependence
-    >>> from theano.tensor.type import matrix
-    >>> from theano.tensor.math import dot
+    >>> from aesara.graph.basic import sort_apply_nodes, dependence
+    >>> from aesara.tensor.type import matrix
+    >>> from aesara.tensor.math import dot
     >>> x = matrix('x')
     >>> y = dot(x*2, x+1)
     >>> str_cmp = lambda a, b: cmp(str(a), str(b)) # lexicographical sort
diff --git a/theano/graph/toolbox.py b/aesara/graph/toolbox.py
similarity index 97%
rename from theano/graph/toolbox.py
rename to aesara/graph/toolbox.py
index 06450df98f..3cb1147c7c 100644
--- a/theano/graph/toolbox.py
+++ b/aesara/graph/toolbox.py
@@ -8,9 +8,9 @@
 
 import numpy as np
 
-import theano
-from theano.configdefaults import config
-from theano.graph.basic import (
+import aesara
+from aesara.configdefaults import config
+from aesara.graph.basic import (
     equal_computations,
     graph_inputs,
     io_toposort,
@@ -236,7 +236,7 @@ class Feature:
 
     See Also
     --------
-    theano.graph.toolbox : for common extensions.
+    aesara.graph.toolbox : for common extensions.
 
     """
 
@@ -534,7 +534,7 @@ def replace_all_validate(self, fgraph, replacements, reason=None, verbose=None):
         if verbose is None:
             verbose = config.optimizer_verbose
         if config.scan__debug:
-            from theano.scan.op import Scan
+            from aesara.scan.op import Scan
 
             scans = [n for n in fgraph.apply_nodes if isinstance(n.op, Scan)]
 
@@ -551,7 +551,6 @@ def replace_all_validate(self, fgraph, replacements, reason=None, verbose=None):
                     # So don't revert as this raise a different error
                     # that isn't helpful.
                     e.args += (
-                        "Please, report this to theano-dev mailing list."
                         " As a temporary work around, you can raise Python"
                         " stack limit with:"
                         " import sys; sys.setrecursionlimit(10000)",
@@ -578,7 +577,7 @@ def replace_all_validate(self, fgraph, replacements, reason=None, verbose=None):
                 print(f"validate failed on node {r}.\n Reason: {reason}, {e}")
             raise
         if config.scan__debug:
-            from theano.scan.op import Scan
+            from aesara.scan.op import Scan
 
             scans2 = [n for n in fgraph.apply_nodes if isinstance(n.op, Scan)]
             nb = len(scans)
@@ -619,9 +618,6 @@ def replace_all_validate_remove(
                         "An optimization wanted to replace a Variable"
                         " in the graph, but the replacement for it doesn't"
                         " remove it. We disabled the optimization."
-                        " Your function runs correctly, but it would be"
-                        " appreciated if you submit this problem to the"
-                        " mailing list theano-users so that we can fix it."
                         f"{reason}: {replacements}",
                     )
                 raise ReplacementDidNotRemoveError()
@@ -639,7 +635,7 @@ def on_import(self, fgraph, node, reason):
     def validate(self, fgraph):
         if self.fail_validate:
             self.fail_validate = False
-            raise theano.graph.fg.InconsistencyError(
+            raise aesara.graph.fg.InconsistencyError(
                 "Trying to reintroduce a removed node"
             )
 
@@ -786,7 +782,7 @@ def validate(self, fgraph):
             op = node.op
             out_idx = node.outputs.index(out)
             if hasattr(op, "destroy_map") and out_idx in op.destroy_map:
-                raise theano.graph.fg.InconsistencyError(
+                raise aesara.graph.fg.InconsistencyError(
                     "A function graph Feature has requested that outputs of the graph "
                     "be prevented from being the result of in-place "
                     f"operations. This has prevented the output {out} from "
@@ -796,12 +792,12 @@ def validate(self, fgraph):
 
 def is_same_graph_with_merge(var1, var2, givens=None):
     """
-    Merge-based implementation of `theano.graph.basic.is_same_graph`.
+    Merge-based implementation of `aesara.graph.basic.is_same_graph`.
 
-    See help on `theano.graph.basic.is_same_graph` for additional documentation.
+    See help on `aesara.graph.basic.is_same_graph` for additional documentation.
 
     """
-    from theano.graph.opt import MergeOptimizer
+    from aesara.graph.opt import MergeOptimizer
 
     if givens is None:
         givens = {}
@@ -813,7 +809,7 @@ def is_same_graph_with_merge(var1, var2, givens=None):
     inputs = list(graph_inputs(vars))
     # The clone isn't needed as we did a deepcopy and we cloning will
     # break the mapping in givens.
-    fgraph = theano.graph.fg.FunctionGraph(inputs, vars, clone=False)
+    fgraph = aesara.graph.fg.FunctionGraph(inputs, vars, clone=False)
     # Perform Variable substitution.
     for to_replace, replace_by in givens.items():
         fgraph.replace(to_replace, replace_by)
@@ -855,7 +851,7 @@ def is_same_graph(var1, var2, givens=None):
     var2
         The second Variable to compare.
     givens
-        Similar to the `givens` argument of `theano.function`, it can be used
+        Similar to the `givens` argument of `aesara.function`, it can be used
         to perform substitutions in the computational graph of `var1` and
         `var2`. This argument is associated to neither `var1` nor `var2`:
         substitutions may affect both graphs if the substituted variable
diff --git a/theano/graph/type.py b/aesara/graph/type.py
similarity index 97%
rename from theano/graph/type.py
rename to aesara/graph/type.py
index 4e92d0af54..4bafb5aaca 100644
--- a/theano/graph/type.py
+++ b/aesara/graph/type.py
@@ -6,11 +6,11 @@
 from abc import abstractmethod
 from typing import Any, NoReturn, Optional, Text, TypeVar, Union
 
-from theano.graph import utils
-from theano.graph.basic import Constant, Variable
-from theano.graph.utils import MetaObject
-from theano.link.c.interface import CLinkerType
-from theano.utils import Singleton
+from aesara.graph import utils
+from aesara.graph.basic import Constant, Variable
+from aesara.graph.utils import MetaObject
+from aesara.link.c.interface import CLinkerType
+from aesara.utils import Singleton
 
 
 __docformat__ = "restructuredtext en"
@@ -204,7 +204,7 @@ def values_eq(cls, a: Any, b: Any) -> bool:
     def values_eq_approx(cls, a: Any, b: Any):
         """Return ``True`` if `a` and `b` can be considered approximately equal.
 
-        This function is used by Theano debugging tools to decide
+        This function is used by Aesara debugging tools to decide
         whether two values are equivalent, admitting a certain amount
         of numerical instability. For example, for floating-point
         numbers this function should be an approximate comparison.
@@ -241,7 +241,7 @@ def values_eq_approx(cls, a: Any, b: Any):
 class CType(Type, CLinkerType):
     """Convenience wrapper combining `Type` and `CLinkerType`.
 
-    Theano comes with several subclasses of such as:
+    Aesara comes with several subclasses of such as:
 
     - `Generic`: for any python type
 
@@ -262,13 +262,13 @@ class CType(Type, CLinkerType):
         # Create a second Variable with the same Type instance
         c = tensor.fvector()
 
-    Whenever you create a symbolic variable in theano (technically,
+    Whenever you create a symbolic variable in aesara (technically,
     `Variable`) it will contain a reference to a Type instance. That
     reference is typically constant during the lifetime of the
     Variable.  Many variables can refer to a single Type instance, as
     do b and c above.  The Type instance defines the kind of value
     which might end up in that variable when executing a `Function`.
-    In this sense, theano is like a strongly-typed language because
+    In this sense, aesara is like a strongly-typed language because
     the types are included in the graph before the values.  In our
     example above, b is a Variable which is guaranteed to correspond
     to a numpy.ndarray of rank 1 when we try to do some computations
@@ -354,7 +354,7 @@ class CDataType(CType):
     Represents opaque C data to be passed around. The intent is to
     ease passing arbitrary data between ops C code.
 
-    The constructor builds a type made to represent a C pointer in theano.
+    The constructor builds a type made to represent a C pointer in aesara.
 
     Parameters
     ----------
@@ -366,7 +366,7 @@ class CDataType(CType):
         have a `void` return and take a single pointer argument.
 
     version
-        The version to use in Theano cache system.
+        The version to use in Aesara cache system.
     """
 
     __props__ = (
@@ -599,7 +599,7 @@ class EnumType(CType, dict):
 
     .. code-block:: python
 
-        from theano.graph.type import EnumType
+        from aesara.graph.type import EnumType
 
         # You can remark that constant 'C' does not have an alias.
         enum = EnumType(A=('alpha', 1), B=('beta', 2), C=3, D=('delta', 4))
@@ -795,20 +795,20 @@ def c_to_string(self):
 
         .. code-block:: c
 
-            int theano_enum_to_string_<cname>(<ctype> value, char* output_string);
+            int aesara_enum_to_string_<cname>(<ctype> value, char* output_string);
 
-        Where ``ctype`` and ``cname`` are the C type and the C name of current Theano enumeration.
+        Where ``ctype`` and ``cname`` are the C type and the C name of current Aesara enumeration.
 
         ``output_string`` should be large enough to contain the longest name in this enumeration.
 
         If given value is unknown, the C function sets a Python ValueError exception and returns a non-zero.
 
         This C function may be useful to retrieve some runtime informations.
-        It is available in C code when theano flag ``config.cmodule__debug`` is set to ``True``.
+        It is available in C code when aesara flag ``config.cmodule__debug`` is set to ``True``.
         """
         return """
         #ifdef DEBUG
-        int theano_enum_to_string_%(cname)s(%(ctype)s in, char* out) {
+        int aesara_enum_to_string_%(cname)s(%(ctype)s in, char* out) {
             int ret = 0;
             switch(in) {
                 %(cases)s
diff --git a/theano/graph/unify.py b/aesara/graph/unify.py
similarity index 99%
rename from theano/graph/unify.py
rename to aesara/graph/unify.py
index b94105bb2c..998e02694a 100644
--- a/theano/graph/unify.py
+++ b/aesara/graph/unify.py
@@ -137,12 +137,12 @@ class Variable:
 
     Notes
     -----
-    There are two Variable classes in theano and this is the more rarely used
+    There are two Variable classes in aesara and this is the more rarely used
     one.
     This class is used internally by the PatternSub optimization,
     and possibly other subroutines that have to perform graph queries.
     If that doesn't sound like what you're doing, the Variable class you
-    want is probably theano.graph.basic.Variable.
+    want is probably aesara.graph.basic.Variable.
 
     """
 
diff --git a/theano/graph/utils.py b/aesara/graph/utils.py
similarity index 94%
rename from theano/graph/utils.py
rename to aesara/graph/utils.py
index 8e474a866d..c1b0689ac6 100644
--- a/theano/graph/utils.py
+++ b/aesara/graph/utils.py
@@ -84,7 +84,7 @@ def add_tag_trace(thing, user_line=None):
     we look.
 
     """
-    from theano.configdefaults import config
+    from aesara.configdefaults import config
 
     if user_line is None:
         user_line = config.traceback__limit
@@ -92,22 +92,22 @@ def add_tag_trace(thing, user_line=None):
     if user_line == -1:
         user_line = None
     skips = [
-        "theano/tensor/",
-        "theano\\tensor\\",
-        "theano/compile/",
-        "theano\\compile\\",
-        "theano/graph/",
-        "theano\\graph\\",
-        "theano/scalar/basic.py",
-        "theano\\scalar\\basic.py",
-        "theano/sandbox/",
-        "theano\\sandbox\\",
-        "theano/scan/",
-        "theano\\scan\\",
-        "theano/sparse/",
-        "theano\\sparse\\",
-        "theano/typed_list/",
-        "theano\\typed_list\\",
+        "aesara/tensor/",
+        "aesara\\tensor\\",
+        "aesara/compile/",
+        "aesara\\compile\\",
+        "aesara/graph/",
+        "aesara\\graph\\",
+        "aesara/scalar/basic.py",
+        "aesara\\scalar\\basic.py",
+        "aesara/sandbox/",
+        "aesara\\sandbox\\",
+        "aesara/scan/",
+        "aesara\\scan\\",
+        "aesara/sparse/",
+        "aesara\\sparse\\",
+        "aesara/typed_list/",
+        "aesara\\typed_list\\",
     ]
 
     if config.traceback__compile_limit > 0:
@@ -246,7 +246,7 @@ def __repr__(self):
         return "scratchpad" + str(self.__dict__)
 
     def info(self):
-        print(f"<theano.graph.utils.scratchpad instance at {id(self)}>")
+        print(f"<aesara.graph.utils.scratchpad instance at {id(self)}>")
         for k, v in self.__dict__.items():
             print(f"  {k}: {v}")
 
diff --git a/theano/ifelse.py b/aesara/ifelse.py
similarity index 97%
rename from theano/ifelse.py
rename to aesara/ifelse.py
index 49b74073ac..89b76cc262 100644
--- a/theano/ifelse.py
+++ b/aesara/ifelse.py
@@ -1,5 +1,5 @@
 """
-IfElse introduces lazy evaluation in Theano (coupled with the CVM/VM
+IfElse introduces lazy evaluation in Aesara (coupled with the CVM/VM
 linkers). It resembles the if clause of any programming language, that
 has a `then` and `else` branch, and executes either one or the other
 according to the condition provided.
@@ -16,15 +16,15 @@
 
 import numpy as np
 
-import theano.tensor as tt
-from theano.compile import optdb
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Variable, clone_replace, is_in_ancestors
-from theano.graph.op import _NoPythonOp
-from theano.graph.opt import GlobalOptimizer, in2out, local_optimizer
-from theano.tensor import basic
-from theano.tensor.shape import Reshape, Shape, SpecifyShape
-from theano.tensor.type import TensorType
+import aesara.tensor as tt
+from aesara.compile import optdb
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Variable, clone_replace, is_in_ancestors
+from aesara.graph.op import _NoPythonOp
+from aesara.graph.opt import GlobalOptimizer, in2out, local_optimizer
+from aesara.tensor import basic
+from aesara.tensor.shape import Reshape, Shape, SpecifyShape
+from aesara.tensor.type import TensorType
 
 
 __docformat__ = "restructedtext en"
@@ -38,7 +38,7 @@
 __copyright__ = "(c) 2010, Universite de Montreal"
 __contact__ = "Razvan Pascanu <r.pascanu@gmail>"
 
-_logger = logging.getLogger("theano.ifelse")
+_logger = logging.getLogger("aesara.ifelse")
 
 
 class IfElse(_NoPythonOp):
@@ -318,24 +318,24 @@ def ifelse(condition, then_branch, else_branch, name=None):
         If it evaluates to 0 it corresponds to False, anything else stands
         for True.
 
-    :type then_branch: list of theano expressions/ theano expression
+    :type then_branch: list of aesara expressions/ aesara expression
     :param then_branch:
-        A single theano variable or a list of theano variables that the
+        A single aesara variable or a list of aesara variables that the
         function should return as the output if ``condition`` evaluates to
         true. The number of variables should match those in the
         ``else_branch``, and there should be a one to one correspondance
         (type wise) with the tensors provided in the else branch
 
-    :type else_branch: list of theano expressions/ theano expressions
+    :type else_branch: list of aesara expressions/ aesara expressions
     :param else_branch:
-        A single theano variable or a list of theano variables that the
+        A single aesara variable or a list of aesara variables that the
         function should return as the output if ``condition`` evaluates to
         false. The number of variables should match those in the then branch,
         and there should be a one to one correspondace (type wise) with the
         tensors provided in the then branch.
 
     :return:
-        A list of theano variables or a single variable (depending on the
+        A list of aesara variables or a single variable (depending on the
         nature of the ``then_branch`` and ``else_branch``). More exactly if
         ``then_branch`` and ``else_branch`` is a tensor, then
         the return variable will be just a single variable, otherwise a
@@ -600,7 +600,7 @@ class CondMerge(GlobalOptimizer):
     """ Graph Optimizer that merges different cond ops """
 
     def add_requirements(self, fgraph):
-        from theano.graph.toolbox import ReplaceValidate
+        from aesara.graph.toolbox import ReplaceValidate
 
         fgraph.add_feature(ReplaceValidate())
 
diff --git a/theano/link/__init__.py b/aesara/link/__init__.py
similarity index 100%
rename from theano/link/__init__.py
rename to aesara/link/__init__.py
diff --git a/theano/link/basic.py b/aesara/link/basic.py
similarity index 98%
rename from theano/link/basic.py
rename to aesara/link/basic.py
index 2d47caf3a8..459aa732a0 100644
--- a/theano/link/basic.py
+++ b/aesara/link/basic.py
@@ -1,12 +1,12 @@
 import typing
 from copy import copy, deepcopy
 
-from theano.configdefaults import config
-from theano.graph.basic import Apply
-from theano.graph.fg import FunctionGraph
-from theano.graph.type import CType
-from theano.link.utils import gc_helper, map_storage, raise_with_op, streamline
-from theano.utils import deprecated, difference, to_return_values
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.type import CType
+from aesara.link.utils import gc_helper, map_storage, raise_with_op, streamline
+from aesara.utils import deprecated, difference, to_return_values
 
 
 class Container:
diff --git a/theano/link/c/__init__.py b/aesara/link/c/__init__.py
similarity index 100%
rename from theano/link/c/__init__.py
rename to aesara/link/c/__init__.py
diff --git a/theano/link/c/basic.py b/aesara/link/c/basic.py
similarity index 98%
rename from theano/link/c/basic.py
rename to aesara/link/c/basic.py
index 158e48b693..e6af09d31d 100644
--- a/theano/link/c/basic.py
+++ b/aesara/link/c/basic.py
@@ -11,25 +11,25 @@
 
 import numpy as np
 
-from theano.compile.compilelock import lock_ctx
-from theano.configdefaults import config
-from theano.graph.basic import Constant, NoParams, io_toposort, vars_between
-from theano.graph.callcache import CallCache
-from theano.link.basic import Container, Linker, LocalLinker, PerformLinker
-from theano.link.c.cmodule import (
+from aesara.compile.compilelock import lock_ctx
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, NoParams, io_toposort, vars_between
+from aesara.graph.callcache import CallCache
+from aesara.link.basic import Container, Linker, LocalLinker, PerformLinker
+from aesara.link.c.cmodule import (
     METH_VARARGS,
     DynamicModule,
     ExtFunction,
     GCC_compiler,
     dlimport_workdir,
 )
-from theano.link.c.cmodule import get_module_cache as _get_module_cache
-from theano.link.c.interface import CLinkerObject, CLinkerOp, CLinkerType
-from theano.link.utils import gc_helper, map_storage, raise_with_op, streamline
-from theano.utils import difference, uniq
+from aesara.link.c.cmodule import get_module_cache as _get_module_cache
+from aesara.link.c.interface import CLinkerObject, CLinkerOp, CLinkerType
+from aesara.link.utils import gc_helper, map_storage, raise_with_op, streamline
+from aesara.utils import difference, uniq
 
 
-_logger = logging.getLogger("theano.link.c.basic")
+_logger = logging.getLogger("aesara.link.c.basic")
 
 
 run_cthunk = None  # Will be imported only when needed.
@@ -310,7 +310,7 @@ def struct_gen(args, struct_builders, blocks, sub):
             // now I am tired of chasing segfaults because
             // initialization code had an error and some pointer has
             // a junk value.
-            #ifndef THEANO_DONT_MEMSET_STRUCT
+            #ifndef AESARA_DONT_MEMSET_STRUCT
             memset(this, 0, sizeof(*this));
             #endif
         }
@@ -655,9 +655,9 @@ def fetch_variables(self):
             for r in self.variables
             if isinstance(r, Constant) and r not in self.inputs
         )
-        # C type constants (theano.scalar.Scalar). They don't request an object
+        # C type constants (aesara.scalar.Scalar). They don't request an object
         self.consts = []
-        # Move c type from orphans (theano.scalar.Scalar) to self.consts
+        # Move c type from orphans (aesara.scalar.Scalar) to self.consts
         for variable in self.orphans:
             if (
                 isinstance(variable, Constant)
@@ -1232,7 +1232,7 @@ def cmodule_key(self):
 
         The outer tuple has a brief header, containing the compilation options
         passed to the compiler, the libraries to link against, a sha256 hash
-        of theano.config (for all config options where "in_c_key" is True).
+        of aesara.config (for all config options where "in_c_key" is True).
         It is followed by elements for every node in the topological ordering
         of `self.fgraph`.
 
@@ -1418,14 +1418,14 @@ def in_sig(i, topological_pos, i_idx):
             if isinstance(i, Constant):  # orphans
                 if id(i) not in constant_ids:
                     isig = (i.signature(), topological_pos, i_idx)
-                    # If the Theano constant provides a strong hash
+                    # If the Aesara constant provides a strong hash
                     # (no collision for transpose, 2, 1, 0, -1, -2,
                     # 2 element swapped...) we put this hash in the signature
                     # instead of the value. This makes the key file much
                     # smaller for big constant arrays. Before this, we saw key
                     # files up to 80M.
-                    if hasattr(isig[0], "theano_hash"):
-                        isig = (isig[0].theano_hash(), topological_pos, i_idx)
+                    if hasattr(isig[0], "aesara_hash"):
+                        isig = (isig[0].aesara_hash(), topological_pos, i_idx)
                     try:
                         hash(isig)
                     except Exception:
@@ -1721,8 +1721,8 @@ class _CThunk:
     def __init__(self, cthunk, init_tasks, tasks, error_storage, module):
         global run_cthunk
         if run_cthunk is None:
-            # Lazy import to avoid compilation when importing theano.
-            from theano.link.c.cutils import run_cthunk  # noqa
+            # Lazy import to avoid compilation when importing aesara.
+            from aesara.link.c.cutils import run_cthunk  # noqa
         self.cthunk = cthunk
         self.init_tasks = init_tasks
         self.tasks = tasks
@@ -1788,7 +1788,7 @@ class OpWiseCLinker(LocalLinker):
 
     Notes
     -----
-    This is in a sense the 'default' linker for Theano. The
+    This is in a sense the 'default' linker for Aesara. The
     overhead of using the OpWiseCLinker as compared with the CLinker
     is only noticeable for graphs of very small tensors (such as 20
     elements or less).
diff --git a/theano/link/c/c_code/theano_mod_helper.h b/aesara/link/c/c_code/aesara_mod_helper.h
similarity index 67%
rename from theano/link/c/c_code/theano_mod_helper.h
rename to aesara/link/c/c_code/aesara_mod_helper.h
index e1327ee849..7489694256 100644
--- a/theano/link/c/c_code/theano_mod_helper.h
+++ b/aesara/link/c/c_code/aesara_mod_helper.h
@@ -1,5 +1,5 @@
-#ifndef THEANO_MOD_HELPER
-#define THEANO_MOD_HELPER
+#ifndef AESARA_MOD_HELPER
+#define AESARA_MOD_HELPER
 
 #include <Python.h>
 
@@ -13,19 +13,19 @@
 #endif
 
 #ifdef __cplusplus
-#define THEANO_EXTERN extern "C"
+#define AESARA_EXTERN extern "C"
 #else
-#define THEANO_EXTERN
+#define AESARA_EXTERN
 #endif
 
 #if PY_MAJOR_VERSION < 3
-#define THEANO_RTYPE void
+#define AESARA_RTYPE void
 #else
-#define THEANO_RTYPE PyObject *
+#define AESARA_RTYPE PyObject *
 #endif
 
 /* We need to redefine PyMODINIT_FUNC to add MOD_PUBLIC in the middle */
 #undef PyMODINIT_FUNC
-#define PyMODINIT_FUNC THEANO_EXTERN MOD_PUBLIC THEANO_RTYPE
+#define PyMODINIT_FUNC AESARA_EXTERN MOD_PUBLIC AESARA_RTYPE
 
 #endif
diff --git a/theano/link/c/c_code/lazylinker_c.c b/aesara/link/c/c_code/lazylinker_c.c
similarity index 99%
rename from theano/link/c/c_code/lazylinker_c.c
rename to aesara/link/c/c_code/lazylinker_c.c
index e9441a9e2a..ec725fbb2d 100644
--- a/theano/link/c/c_code/lazylinker_c.c
+++ b/aesara/link/c/c_code/lazylinker_c.c
@@ -1,5 +1,5 @@
 #include <Python.h>
-#include "theano_mod_helper.h"
+#include "aesara_mod_helper.h"
 #include "structmember.h"
 #include <sys/time.h>
 
@@ -16,7 +16,7 @@
 
 /**
 
-TODO: 
+TODO:
 - Check max supported depth of recursion
 - CLazyLinker should add context information to errors caught during evaluation. Say what node we were on, add the traceback attached to the node.
 - Clear containers of fully-useed intermediate results if allow_gc is 1
@@ -1083,7 +1083,7 @@ PyInit_lazylinker_ext(void) {
 #else
 #define RETVAL
 PyMODINIT_FUNC
-initlazylinker_ext(void) 
+initlazylinker_ext(void)
 {
 #endif
     PyObject* m;
diff --git a/theano/link/c/cmodule.py b/aesara/link/c/cmodule.py
similarity index 98%
rename from theano/link/c/cmodule.py
rename to aesara/link/c/cmodule.py
index ba1174ba11..5611d80b6e 100644
--- a/theano/link/c/cmodule.py
+++ b/aesara/link/c/cmodule.py
@@ -22,13 +22,13 @@
 
 import numpy.distutils
 
-import theano
+import aesara
 
 # we will abuse the lockfile mechanism when reading and writing the registry
-from theano.compile.compilelock import lock_ctx
-from theano.configdefaults import config, gcc_version_str
-from theano.link.c.exceptions import MissingGXX
-from theano.utils import (
+from aesara.compile.compilelock import lock_ctx
+from aesara.configdefaults import config, gcc_version_str
+from aesara.link.c.exceptions import MissingGXX
+from aesara.utils import (
     LOCAL_BITWIDTH,
     flatten,
     hash_from_code,
@@ -37,7 +37,7 @@
 )
 
 
-_logger = logging.getLogger("theano.link.c.cmodule")
+_logger = logging.getLogger("aesara.link.c.cmodule")
 
 METH_VARARGS = "METH_VARARGS"
 METH_NOARGS = "METH_NOARGS"
@@ -127,7 +127,7 @@ def __init__(self, name=None):
 
         self.support_code = []
         self.functions = []
-        self.includes = ["<Python.h>", "<iostream>", '"theano_mod_helper.h"']
+        self.includes = ["<Python.h>", "<iostream>", '"aesara_mod_helper.h"']
         self.init_blocks = []
 
     def print_methoddef(self, stream):
@@ -413,9 +413,9 @@ def get_module_hash(src_code, key):
         elif isinstance(key_element, str):
             if key_element.startswith("md5:") or key_element.startswith("hash:"):
                 # This is actually a sha256 hash of the config options.
-                # Currently, we still keep md5 to don't break old Theano.
+                # Currently, we still keep md5 to don't break old Aesara.
                 # We add 'hash:' so that when we change it in
-                # the futur, it won't break this version of Theano.
+                # the futur, it won't break this version of Aesara.
                 break
             elif key_element.startswith("NPY_ABI_VERSION=0x") or key_element.startswith(
                 "c_compiler_str="
@@ -447,8 +447,8 @@ def get_safe_part(key):
 
     # Find the hash part. This is actually a sha256 hash of the config
     # options.  Currently, we still keep md5 to don't break old
-    # Theano.  We add 'hash:' so that when we change it
-    # in the futur, it won't break this version of Theano.
+    # Aesara.  We add 'hash:' so that when we change it
+    # in the futur, it won't break this version of Aesara.
     c_link_key = key[1]
     # In case in the future, we don't have an md5 part and we have
     # such stuff in the cache.  In that case, we can set None, and the
@@ -831,7 +831,7 @@ def unpickle_failure():
                             # This exception is often triggered by keys
                             # that contain references to classes that have
                             # not yet been imported (e.g. when running two
-                            # different Theano-based scripts). They are not
+                            # different Aesara-based scripts). They are not
                             # necessarily broken, but we cannot load them
                             # now. They will be loaded later if needed.
                             pass
@@ -1120,7 +1120,7 @@ def _add_to_cache(self, module, key, module_hash):
             self.loaded_key_pkl.add(key_pkl)
         elif config.cmodule__warn_no_version:
             key_flat = flatten(key)
-            ops = [k for k in key_flat if isinstance(k, theano.graph.op.Op)]
+            ops = [k for k in key_flat if isinstance(k, aesara.graph.op.Op)]
             _logger.warning(
                 "not all the"
                 " following op(s) implement"
@@ -1161,10 +1161,10 @@ def module_from_key(self, key, lnk=None):
         with lock_ctx():
             # 1) Maybe somebody else compiled it for us while we
             #    where waiting for the lock. Try to load it again.
-            # 2) If other repo that import Theano have Theano ops defined,
+            # 2) If other repo that import Aesara have Aesara ops defined,
             #    we need to refresh the cache here. Otherwise, there are import
             #    order problems.
-            #    When device=gpu, we compile during Theano
+            #    When device=gpu, we compile during Aesara
             #    import. This triggers the loading of the cache. But
             #    unpickling the cache asks that the external Ops are
             #    completly loaded, which isn't always the case!
@@ -1987,7 +1987,7 @@ def try_march_flag(flags):
             """
     )
 
-    cflags = flags + ["-L" + d for d in theano.link.c.cmodule.std_lib_dirs()]
+    cflags = flags + ["-L" + d for d in aesara.link.c.cmodule.std_lib_dirs()]
     compilation_result, execution_result = GCC_compiler.try_compile_tmp(
         test_code, tmp_prefix="try_march_", flags=cflags, try_run=True
     )
@@ -2009,8 +2009,8 @@ def compile_args(march_flags=True):
         cxxflags = [flag for flag in config.gcc__cxxflags.split(" ") if flag]
         if "-fopenmp" in cxxflags:
             raise ValueError(
-                "Do not use -fopenmp in Theano flag gcc__cxxflags."
-                " To enable OpenMP, use the Theano flag openmp=True"
+                "Do not use -fopenmp in Aesara flag gcc__cxxflags."
+                " To enable OpenMP, use the Aesara flag openmp=True"
             )
         # Add the equivalent of -march=native flag.  We can't use
         # -march=native as when the compiledir is shared by multiple
@@ -2036,12 +2036,12 @@ def compile_args(march_flags=True):
             and "icpc" not in config.cxx
         ):
             _logger.warning(
-                "Your Theano flag `cxx` seems not to be"
+                "Your Aesara flag `cxx` seems not to be"
                 " the g++ compiler. So we disable the compiler optimization"
                 " specific to g++ that tell to compile for a specific CPU."
                 " At worst, this could cause slow down.\n"
                 "         You can add those parameters to the compiler yourself"
-                " via the Theano flag `gcc__cxxflags`."
+                " via the Aesara flag `gcc__cxxflags`."
             )
             detect_march = False
 
@@ -2105,11 +2105,11 @@ def get_lines(cmd, parse=True):
                 else:
                     reported_lines = native_lines
                 _logger.warning(
-                    "Theano was not able to find the"
+                    "Aesara was not able to find the"
                     " g++ parameters that tune the compilation to your "
-                    " specific CPU. This can slow down the execution of Theano"
+                    " specific CPU. This can slow down the execution of Aesara"
                     " functions. Please submit the following lines to"
-                    " Theano's mailing list so that we can fix this"
+                    " Aesara's mailing list so that we can fix this"
                     f" problem:\n {reported_lines}"
                 )
             else:
@@ -2117,12 +2117,12 @@ def get_lines(cmd, parse=True):
                 _logger.info(f"g++ default lines: {default_lines}")
                 if len(default_lines) < 1:
                     _logger.warning(
-                        "Theano was not able to find the"
+                        "Aesara was not able to find the"
                         " default g++ parameters. This is needed to tune"
                         " the compilation to your specific"
-                        " CPU. This can slow down the execution of Theano"
+                        " CPU. This can slow down the execution of Aesara"
                         " functions. Please submit the following lines to"
-                        " Theano's mailing list so that we can fix this"
+                        " Aesara's mailing list so that we can fix this"
                         " problem:\n %s",
                         get_lines(f"{config.cxx} -E -v -", parse=False),
                     )
@@ -2298,7 +2298,7 @@ def join_options(init_part):
         # Figure out whether the current Python executable is 32
         # or 64 bit and compile accordingly. This step is ignored for
         # ARM (32-bit and 64-bit) architectures in order to make
-        # Theano compatible with the Raspberry Pi, Raspberry Pi 2, or
+        # Aesara compatible with the Raspberry Pi, Raspberry Pi 2, or
         # other systems with ARM processors.
         if not any(["arm" in flag for flag in cxxflags]) and not any(
             arch in platform.machine() for arch in ["arm", "aarch"]
@@ -2504,7 +2504,7 @@ def print_command_line_error():
 
         if status:
             tf = tempfile.NamedTemporaryFile(
-                mode="w", prefix="theano_compilation_error_", delete=False
+                mode="w", prefix="aesara_compilation_error_", delete=False
             )
             # gcc put its messages to stderr, so we add ours now
             tf.write("===============================\n")
@@ -2583,7 +2583,7 @@ def check_mkl_openmp():
         if "2018" in mkl.get_version_string():
             raise RuntimeError(
                 """
-To use MKL 2018 with Theano either update the numpy conda packages to
+To use MKL 2018 with Aesara either update the numpy conda packages to
 their latest build or set "MKL_THREADING_LAYER=GNU" in your
 environment.
 """
@@ -2596,7 +2596,7 @@ def check_mkl_openmp():
 your environment for MKL 2018.
 
 If you have MKL 2017 install and are not in a conda environment you
-can set the Theano flag blas__check_openmp to False.  Be warned that if
+can set the Aesara flag blas__check_openmp to False.  Be warned that if
 you set this flag and don't set the appropriate environment or make
 sure you have the right version you *will* get wrong results.
 """
@@ -2621,7 +2621,7 @@ def default_blas_ldflags():
             blas_info = numpy.distutils.__config__.blas_opt_info
         else:
             # We do this import only here, as in some setup, if we
-            # just import theano and exit, with the import at global
+            # just import aesara and exit, with the import at global
             # scope, we get this error at exit: "Exception TypeError:
             # "'NoneType' object is not callable" in <bound method
             # Popen.__del__ of <subprocess.Popen object at 0x21359d0>>
@@ -2669,7 +2669,7 @@ def default_blas_ldflags():
                         "The environment variable "
                         "'DYLD_FALLBACK_LIBRARY_PATH' does not contain "
                         "the '{new_path}' path in its value. This will make "
-                        "Theano use a slow version of BLAS. Update "
+                        "Aesara use a slow version of BLAS. Update "
                         "'DYLD_FALLBACK_LIBRARY_PATH' to contain the "
                         "said value, this will disable this warning."
                     )
@@ -2767,7 +2767,7 @@ def default_blas_ldflags():
             res = try_blas_flag(flags)
             if res:
                 check_mkl_openmp()
-                theano.utils.maybe_add_to_os_environ_pathlist("PATH", lib_path[0])
+                aesara.utils.maybe_add_to_os_environ_pathlist("PATH", lib_path[0])
                 return res
 
         # to support path that includes spaces, we need to wrap it with double quotes on Windows
@@ -2838,7 +2838,7 @@ def add_blas_configvars():
     config.add(
         "blas__ldflags",
         "lib[s] to include for [Fortran] level-3 blas implementation",
-        theano.configparser.StrParam(default_blas_ldflags),
+        aesara.configparser.StrParam(default_blas_ldflags),
         # Added elsewhere in the c key only when needed.
         in_c_key=False,
     )
@@ -2846,7 +2846,7 @@ def add_blas_configvars():
     config.add(
         "blas__check_openmp",
         "Check for openmp library conflict.\nWARNING: Setting this to False leaves you open to wrong results in blas-related operations.",
-        theano.configparser.BoolParam(True),
+        aesara.configparser.BoolParam(True),
         in_c_key=False,
     )
 
diff --git a/theano/link/c/cutils.py b/aesara/link/c/cutils.py
similarity index 95%
rename from theano/link/c/cutils.py
rename to aesara/link/c/cutils.py
index bbcd50fb62..311a8eb33c 100644
--- a/theano/link/c/cutils.py
+++ b/aesara/link/c/cutils.py
@@ -2,9 +2,9 @@
 import os
 import sys
 
-from theano.compile.compilelock import lock_ctx
-from theano.configdefaults import config
-from theano.link.c import cmodule
+from aesara.compile.compilelock import lock_ctx
+from aesara.configdefaults import config
+from aesara.link.c import cmodule
 
 
 # TODO These two lines may be removed in the future, when we are 100% sure
@@ -20,7 +20,7 @@ def compile_cutils():
     """
     code = """
         #include <Python.h>
-        #include "theano_mod_helper.h"
+        #include "aesara_mod_helper.h"
 
         extern "C"{
         static PyObject *
@@ -44,7 +44,7 @@ def compile_cutils():
          }
          static PyMethodDef CutilsExtMethods[] = {
             {"run_cthunk",  run_cthunk, METH_VARARGS|METH_KEYWORDS,
-             "Run a theano cthunk."},
+             "Run an aesara cthunk."},
             {NULL, NULL, 0, NULL}        /* Sentinel */
         };"""
 
diff --git a/theano/link/c/cvm.py b/aesara/link/c/cvm.py
similarity index 78%
rename from theano/link/c/cvm.py
rename to aesara/link/c/cvm.py
index f433328d6d..2432ea90f8 100644
--- a/theano/link/c/cvm.py
+++ b/aesara/link/c/cvm.py
@@ -1,6 +1,6 @@
-from theano.configdefaults import config
-from theano.link.c.exceptions import MissingGXX
-from theano.link.vm import VM
+from aesara.configdefaults import config
+from aesara.link.c.exceptions import MissingGXX
+from aesara.link.vm import VM
 
 
 try:
@@ -8,9 +8,9 @@
     # either lazy-linker C code or lazy-linker compiled C code from the cache.
     if not config.cxx:
         raise MissingGXX(
-            "lazylinker will not be imported if theano.config.cxx is not set."
+            "lazylinker will not be imported if aesara.config.cxx is not set."
         )
-    from theano.link.c import lazylinker_c
+    from aesara.link.c import lazylinker_c
 
     class CVM(lazylinker_c.CLazyLinker, VM):
         def __init__(self, fgraph, *args, **kwargs):
diff --git a/theano/link/c/exceptions.py b/aesara/link/c/exceptions.py
similarity index 100%
rename from theano/link/c/exceptions.py
rename to aesara/link/c/exceptions.py
diff --git a/theano/link/c/interface.py b/aesara/link/c/interface.py
similarity index 99%
rename from theano/link/c/interface.py
rename to aesara/link/c/interface.py
index 1c4d549dce..988c725642 100644
--- a/theano/link/c/interface.py
+++ b/aesara/link/c/interface.py
@@ -1,8 +1,8 @@
 from abc import abstractmethod
 from typing import Dict, List, Text, Tuple
 
-from theano.graph.basic import Apply, Constant
-from theano.graph.utils import MethodNotDefined
+from aesara.graph.basic import Apply, Constant
+from aesara.graph.utils import MethodNotDefined
 
 
 class CLinkerObject:
diff --git a/theano/link/c/lazylinker_c.py b/aesara/link/c/lazylinker_c.py
similarity index 93%
rename from theano/link/c/lazylinker_c.py
rename to aesara/link/c/lazylinker_c.py
index 0658ae6e68..ce1ecd616c 100644
--- a/theano/link/c/lazylinker_c.py
+++ b/aesara/link/c/lazylinker_c.py
@@ -5,10 +5,10 @@
 import warnings
 from importlib import reload
 
-import theano
-from theano.compile.compilelock import lock_ctx
-from theano.configdefaults import config
-from theano.link.c.cmodule import GCC_compiler
+import aesara
+from aesara.compile.compilelock import lock_ctx
+from aesara.configdefaults import config
+from aesara.link.c.cmodule import GCC_compiler
 
 
 _logger = logging.getLogger(__file__)
@@ -108,7 +108,7 @@ def try_reload():
             _logger.info("Compiling new CVM")
             dirname = "lazylinker_ext"
             cfile = os.path.join(
-                theano.__path__[0], "link", "c", "c_code", "lazylinker_c.c"
+                aesara.__path__[0], "link", "c", "c_code", "lazylinker_c.c"
             )
             if not os.path.exists(cfile):
                 # This can happen in not normal case. We just
@@ -117,9 +117,9 @@ def try_reload():
                 warnings.warn(
                     "The file lazylinker_c.c is not available. This do"
                     "not happen normally. You are probably in a strange"
-                    "setup. This mean Theano can not use the cvm:"
-                    "our c execution engine for Theano function. If you"
-                    "want to remove this warning, use the Theano flag"
+                    "setup. This mean Aesara can not use the cvm:"
+                    "our c execution engine for Aesara function. If you"
+                    "want to remove this warning, use the Aesara flag"
                     "'cxx=' (set to an empty string) to disable all c"
                     "code generation."
                 )
diff --git a/aesara/link/jax/__init__.py b/aesara/link/jax/__init__.py
new file mode 100644
index 0000000000..3dbbdf5717
--- /dev/null
+++ b/aesara/link/jax/__init__.py
@@ -0,0 +1 @@
+﻿from aesara.link.jax.jax_linker import JAXLinker
diff --git a/theano/link/jax/jax_dispatch.py b/aesara/link/jax/jax_dispatch.py
similarity index 95%
rename from theano/link/jax/jax_dispatch.py
rename to aesara/link/jax/jax_dispatch.py
index e6e4baa5a6..b8e25a79f9 100644
--- a/theano/link/jax/jax_dispatch.py
+++ b/aesara/link/jax/jax_dispatch.py
@@ -6,15 +6,15 @@
 import jax.numpy as jnp
 import jax.scipy as jsp
 
-from theano.compile.ops import DeepCopyOp, ViewOp
-from theano.configdefaults import config
-from theano.graph.fg import FunctionGraph
-from theano.graph.type import CType
-from theano.ifelse import IfElse
-from theano.scalar.basic import Cast, Clip, Composite, Identity, ScalarOp, Second
-from theano.scan.op import Scan
-from theano.scan.utils import scan_args as ScanArgs
-from theano.tensor.basic import (
+from aesara.compile.ops import DeepCopyOp, ViewOp
+from aesara.configdefaults import config
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.type import CType
+from aesara.ifelse import IfElse
+from aesara.scalar.basic import Cast, Clip, Composite, Identity, ScalarOp, Second
+from aesara.scan.op import Scan
+from aesara.scan.utils import scan_args as ScanArgs
+from aesara.tensor.basic import (
     Alloc,
     AllocEmpty,
     ARange,
@@ -25,9 +25,9 @@
     ScalarFromTensor,
     TensorFromScalar,
 )
-from theano.tensor.blas import BatchedDot
-from theano.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from theano.tensor.extra_ops import (
+from aesara.tensor.blas import BatchedDot
+from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise
+from aesara.tensor.extra_ops import (
     Bartlett,
     CumOp,
     DiffOp,
@@ -38,8 +38,8 @@
     Unique,
     UnravelIndex,
 )
-from theano.tensor.math import Dot, MaxAndArgmax
-from theano.tensor.nlinalg import (
+from aesara.tensor.math import Dot, MaxAndArgmax
+from aesara.tensor.nlinalg import (
     SVD,
     AllocDiag,
     Det,
@@ -50,11 +50,11 @@
     QRFull,
     QRIncomplete,
 )
-from theano.tensor.nnet.basic import Softmax
-from theano.tensor.nnet.sigm import ScalarSoftplus
-from theano.tensor.shape import Reshape, Shape, Shape_i, SpecifyShape
-from theano.tensor.slinalg import Cholesky, Solve
-from theano.tensor.subtensor import (  # This is essentially `np.take`; Boolean mask indexing and setting
+from aesara.tensor.nnet.basic import Softmax
+from aesara.tensor.nnet.sigm import ScalarSoftplus
+from aesara.tensor.shape import Reshape, Shape, Shape_i, SpecifyShape
+from aesara.tensor.slinalg import Cholesky, Solve
+from aesara.tensor.subtensor import (  # This is essentially `np.take`; Boolean mask indexing and setting
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
     AdvancedSubtensor,
@@ -63,7 +63,7 @@
     Subtensor,
     get_idx_list,
 )
-from theano.tensor.type_other import MakeSlice
+from aesara.tensor.type_other import MakeSlice
 
 
 if config.floatX == "float64":
@@ -92,7 +92,7 @@ def compose_jax_funcs(out_node, fgraph_inputs, memo=None):
 
     Parameters
     ----------
-    out_node: theano.graph.basic.Apply
+    out_node: aesara.graph.basic.Apply
         The node for which we want to construct a JAX JIT-able function.
     fgraph_inputs: List[Variable]
         The inputs--in a `FunctionGraph` sense--to `out_node`.
@@ -130,7 +130,7 @@ def jax_inputs_func(*inputs, i_dtype=i_dtype, idx=idx):
             input_f = jax_inputs_func
 
         elif i.owner is None:
-            # This input is something like a `theano.graph.basic.Constant`
+            # This input is something like a `aesara.graph.basic.Constant`
 
             i_dtype = getattr(i, "dtype", None)
             i_data = i.data
@@ -186,7 +186,7 @@ def jax_func(*inputs):
 
 @singledispatch
 def jax_funcify(op):
-    """Create a JAX "perform" function for a Theano `Variable` and its `Op`."""
+    """Create a JAX "perform" function for an Aesara `Variable` and its `Op`."""
     raise NotImplementedError(f"No JAX conversion for the given `Op`: {op}")
 
 
@@ -209,7 +209,7 @@ def jax_funcify_ScalarOp(op):
 
     if hasattr(op, "nfunc_variadic"):
         # These are special cases that handle invalid arities due to the broken
-        # Theano `Op` type contract (e.g. binary `Op`s that also function as
+        # Aesara `Op` type contract (e.g. binary `Op`s that also function as
         # their own variadic counterparts--even when those counterparts already
         # exist as independent `Op`s).
         jax_variadic_func = getattr(jnp, op.nfunc_variadic)
@@ -693,7 +693,7 @@ def careduce(x):
             jax_op = getattr(jnp, op_nfunc_spec[0])
             return jax_op(x, axis=axis).astype(acc_dtype)
 
-        # The Theano `Op` didn't tell us which NumPy equivalent to use (or
+        # The Aesara `Op` didn't tell us which NumPy equivalent to use (or
         # there isn't one), so we use this fallback approach
         if scalar_nfunc_spec:
             scalar_fn_name = scalar_nfunc_spec[0]
diff --git a/theano/link/jax/jax_linker.py b/aesara/link/jax/jax_linker.py
similarity index 96%
rename from theano/link/jax/jax_linker.py
rename to aesara/link/jax/jax_linker.py
index 1d36638819..2d48fd330c 100644
--- a/theano/link/jax/jax_linker.py
+++ b/aesara/link/jax/jax_linker.py
@@ -1,10 +1,10 @@
 from collections.abc import Sequence
 from warnings import warn
 
-from theano.graph.basic import Constant
-from theano.link.basic import Container, PerformLinker
-from theano.link.utils import gc_helper, map_storage, streamline
-from theano.utils import difference
+from aesara.graph.basic import Constant
+from aesara.link.basic import Container, PerformLinker
+from aesara.link.utils import gc_helper, map_storage, streamline
+from aesara.utils import difference
 
 
 class JAXLinker(PerformLinker):
@@ -44,7 +44,7 @@ def create_jax_thunks(self, compute_map, storage_map):
         """
         import jax
 
-        from theano.link.jax.jax_dispatch import jax_funcify
+        from aesara.link.jax.jax_dispatch import jax_funcify
 
         output_nodes = [o.owner for o in self.fgraph.outputs]
 
diff --git a/theano/link/utils.py b/aesara/link/utils.py
similarity index 95%
rename from theano/link/utils.py
rename to aesara/link/utils.py
index 9fb2bf30a4..64b9f90bc7 100644
--- a/theano/link/utils.py
+++ b/aesara/link/utils.py
@@ -7,10 +7,10 @@
 
 import numpy as np
 
-from theano import utils
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Constant
-from theano.graph.fg import FunctionGraph
+from aesara import utils
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Constant
+from aesara.graph.fg import FunctionGraph
 
 
 def map_storage(
@@ -230,7 +230,7 @@ def gc_helper(node_list: typing.List[Apply]):
     This is used to allow garbage collection within graphs.
 
     It ignores view_map and destroy_map. This isn't needed as python
-    have reference count. In Theano gc, we should not take into
+    have reference count. In Aesara gc, we should not take into
     account view_map and destroy_map as if the thunk decided to create
     a new output, we would delay uselessly its gc by Python.
 
@@ -262,7 +262,7 @@ def raise_with_op(
         associated traceback, as would be returned by a call to
         `sys.exc_info()` (which is done if `None` is passed).
     storage_map: dict, optional
-        storage map of the theano function that resulted in the
+        storage map of the aesara function that resulted in the
         raised exception.
 
     Notes
@@ -270,7 +270,7 @@ def raise_with_op(
     This re-raises the exception described by `exc_info` (or the last
     one raised, if `exc_info` is omitted) and annotates the exception
     object with several new members which may be helpful for debugging
-    Theano graphs. They are:
+    Aesara graphs. They are:
 
      * __op_instance__: The Op that is responsible for the exception
        being raised.
@@ -352,7 +352,7 @@ def raise_with_op(
     # Print node backtraces
     tr = getattr(node.outputs[0].tag, "trace", [])
     if isinstance(tr, list) and len(tr) > 0:
-        detailed_err_msg += "\nBacktrace when the node is created(use Theano flag traceback__limit=N to make it longer):\n"
+        detailed_err_msg += "\nBacktrace when the node is created(use Aesara flag traceback__limit=N to make it longer):\n"
 
         # Print separate message for each element in the list of batcktraces
         sio = io.StringIO()
@@ -361,19 +361,19 @@ def raise_with_op(
         detailed_err_msg += str(sio.getvalue())
     else:
         hints.append(
-            "HINT: Re-running with most Theano optimization disabled could"
+            "HINT: Re-running with most Aesara optimization disabled could"
             " give you a back-trace of when this node was created. This can"
-            " be done with by setting the Theano flag"
+            " be done with by setting the Aesara flag"
             " 'optimizer=fast_compile'. If that does not work,"
-            " Theano optimizations can be disabled with 'optimizer=None'."
+            " Aesara optimizations can be disabled with 'optimizer=None'."
         )
 
     if verbosity == "high":
 
-        import theano.printing
+        import aesara.printing
 
         f = io.StringIO()
-        theano.printing.debugprint(node, file=f, stop_on_name=True, print_type=True)
+        aesara.printing.debugprint(node, file=f, stop_on_name=True, print_type=True)
         detailed_err_msg += "\nDebugprint of the apply node: \n"
         detailed_err_msg += f.getvalue()
 
@@ -383,12 +383,12 @@ def raise_with_op(
         shared_input_list = [
             item
             for item in fgraph.inputs
-            if isinstance(item, theano.compile.SharedVariable)
+            if isinstance(item, aesara.compile.SharedVariable)
         ]
         nonshared_input_list = [
             item
             for item in fgraph.inputs
-            if not isinstance(item, theano.compile.SharedVariable)
+            if not isinstance(item, aesara.compile.SharedVariable)
         ]
         storage_map_list = []
         total_size = 0
@@ -493,7 +493,7 @@ def raise_with_op(
 
     else:
         hints.append(
-            "HINT: Use the Theano flag 'exception_verbosity=high'"
+            "HINT: Use the Aesara flag 'exception_verbosity=high'"
             " for a debugprint and storage map footprint of this apply node."
         )
 
@@ -510,7 +510,7 @@ def raise_with_op(
 
 def __log_thunk_trace(value, handler: io.TextIOWrapper):
     """
-    Log Theano's diagnostic stack trace for an exception.
+    Log Aesara's diagnostic stack trace for an exception.
 
     Uses custom attributes that are added to trace objects by raise_with_op.
     """
@@ -537,12 +537,12 @@ def write(msg):
                 write(line)
             write(
                 "For the full definition stack trace set"
-                " the Theano flags traceback__limit to -1"
+                " the Aesara flags traceback__limit to -1"
             )
 
 
 def register_thunk_trace_excepthook(handler: io.TextIOWrapper = sys.stdout):
-    """Adds the __log_thunk_trace except hook to the collection in theano.utils.
+    """Adds the __log_thunk_trace except hook to the collection in aesara.utils.
 
     Parameters
     ----------
diff --git a/theano/link/vm.py b/aesara/link/vm.py
similarity index 98%
rename from theano/link/vm.py
rename to aesara/link/vm.py
index a437296bf3..5c188150ef 100644
--- a/theano/link/vm.py
+++ b/aesara/link/vm.py
@@ -1,5 +1,5 @@
 """
-VMs that run Theano graph computations.
+VMs that run Aesara graph computations.
 
 A VM is not actually different from a Linker, we just decided
 VM was a better name at some point.
@@ -12,11 +12,11 @@
 import warnings
 from collections import defaultdict
 
-from theano.configdefaults import config
-from theano.graph.basic import Constant, Variable
-from theano.link.basic import Container, LocalLinker
-from theano.link.c.exceptions import MissingGXX
-from theano.link.utils import gc_helper, map_storage, raise_with_op
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, Variable
+from aesara.link.basic import Container, LocalLinker
+from aesara.link.c.exceptions import MissingGXX
+from aesara.link.utils import gc_helper, map_storage, raise_with_op
 
 
 logger = logging.getLogger(__name__)
@@ -123,7 +123,7 @@ def calculate_reallocate_info(order, fgraph, storage_map, compute_map_re, depend
 
 class VM:
     """
-    A VM object's __call__ method evaluates a Theano program.
+    A VM object's __call__ method evaluates an Aesara program.
 
     The Stack should be considered the reference VM/Linker implementation.
     It can correctly evaluate all graphs and is the easiest to read. The CVM
@@ -243,7 +243,7 @@ class Loop(VM):
 
     """
 
-    # Some other part of Theano query that information
+    # Some other part of Aesara query that information
     allow_gc = False
 
     def __call__(self):
@@ -279,7 +279,7 @@ class LoopGC(VM):
     def __init__(self, fgraph, nodes, thunks, pre_call_clear, post_thunk_clear):
         super().__init__(fgraph, nodes, thunks, pre_call_clear)
         self.post_thunk_clear = post_thunk_clear
-        # Some other part of Theano query that information
+        # Some other part of Aesara query that information
         self.allow_gc = True
         if not (len(nodes) == len(thunks) == len(post_thunk_clear)):
             raise ValueError()
@@ -580,7 +580,7 @@ def __call__(self, output_subset=None):
                                     ):
                                         warnings.warn(
                                             "There was a bug that existed in "
-                                            "the default Theano configuration,"
+                                            "the default Aesara configuration,"
                                             " only in the development version "
                                             "between July 5th 2012 and "
                                             "July 30th 2012. This was not in "
@@ -703,7 +703,7 @@ class VMLinker(LocalLinker):
         Force the virtual machine to clean up unnecessary
         references, in order to allow garbage collection on
         intermediate values during computation of a function.
-        If None use as default the value of the Theano flag allow_gc.
+        If None use as default the value of the Aesara flag allow_gc.
     use_cloop
         Use the C-based virtual machine if possible
     callback
@@ -717,7 +717,7 @@ class VMLinker(LocalLinker):
         'var', 'value'.
     lazy
         Useful only when use_cloop is False. When lazy is None, use the
-        theano flag vm__lazy value. Then if we have a None (default) we auto
+        aesara flag vm__lazy value. Then if we have a None (default) we auto
         detect if lazy evaluation is needed and use the appropriate
         version. If lazy is True or False, we force the version used
         between Loop/LoopGC and Stack.
@@ -890,7 +890,7 @@ def make_vm(
         pre_call_clear = [storage_map[v] for v in self.no_recycling]
 
         try:
-            from theano.link.c.cvm import CVM
+            from aesara.link.c.cvm import CVM
         except (MissingGXX, ImportError):
             CVM = None
 
diff --git a/theano/misc/__init__.py b/aesara/misc/__init__.py
similarity index 100%
rename from theano/misc/__init__.py
rename to aesara/misc/__init__.py
diff --git a/theano/misc/burn_gpu.py b/aesara/misc/burn_gpu.py
similarity index 63%
rename from theano/misc/burn_gpu.py
rename to aesara/misc/burn_gpu.py
index ac71c67631..50dba18aa2 100644
--- a/theano/misc/burn_gpu.py
+++ b/aesara/misc/burn_gpu.py
@@ -6,11 +6,11 @@
 
 import numpy as np
 
-import theano
-from theano.configdefaults import config
-from theano.gpuarray import dnn
-from theano.tensor.nnet.abstract_conv import get_conv_output_shape
-from theano.tensor.type import tensor4
+import aesara
+from aesara.configdefaults import config
+from aesara.gpuarray import dnn
+from aesara.tensor.nnet.abstract_conv import get_conv_output_shape
+from aesara.tensor.type import tensor4
 
 
 def burn():
@@ -25,15 +25,15 @@ def burn():
     def rand(shp):
         return np.random.rand(*shp).astype(config.floatX)
 
-    img = theano.shared(rand(img_shp))
-    kern = theano.shared(rand(kern_shp))
-    out = theano.shared(rand(out_shp))
+    img = aesara.shared(rand(img_shp))
+    kern = aesara.shared(rand(kern_shp))
+    out = aesara.shared(rand(out_shp))
     # beta 1 is needed to force the reuse of out, otherwise, it is
     # replaced by a GpuAllocEmpty
     o1 = dnn._dnn_conv(img, kern, conv_mode="conv", out=out, beta=1.0)
-    mode = theano.compile.get_default_mode().including("local_remove_all_assert")
-    f = theano.function([], [o1], mode=mode)
-    theano.printing.debugprint(f)
+    mode = aesara.compile.get_default_mode().including("local_remove_all_assert")
+    f = aesara.function([], [o1], mode=mode)
+    aesara.printing.debugprint(f)
     print("Start computation")
     for i in range(10000):
         f.fn()
diff --git a/theano/misc/check_blas.py b/aesara/misc/check_blas.py
similarity index 90%
rename from theano/misc/check_blas.py
rename to aesara/misc/check_blas.py
index b6ee009f63..b09c7658ca 100644
--- a/theano/misc/check_blas.py
+++ b/aesara/misc/check_blas.py
@@ -14,15 +14,15 @@
 
 import numpy as np
 
-import theano
-from theano.configdefaults import config
-from theano.tensor.math import dot
+import aesara
+from aesara.configdefaults import config
+from aesara.tensor.math import dot
 
 
 def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order="C"):
     """
-    :param execute: If True, execute a Theano function that should call gemm.
-    :param verbose: If True, will print some Theano flags and env variables.
+    :param execute: If True, execute an Aesara function that should call gemm.
+    :param verbose: If True, will print some Aesara flags and env variables.
     :param M,N,K: The M,N,K size used by gemm.
     :param iters: The number of calls to gemm to do.
 
@@ -31,7 +31,7 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order=
     """
 
     if verbose:
-        print("Some Theano flags:")
+        print("Some Aesara flags:")
         print("    blas__ldflags=", config.blas__ldflags)
         print("    compiledir=", config.compiledir)
         print("    floatX=", config.floatX)
@@ -45,16 +45,16 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order=
         print("    OMP_NUM_THREADS=", os.getenv("OMP_NUM_THREADS"))
         print("    GOTO_NUM_THREADS=", os.getenv("GOTO_NUM_THREADS"))
         print()
-        print("Numpy config: (used when the Theano flag" ' "blas__ldflags" is empty)')
+        print("Numpy config: (used when the Aesara flag" ' "blas__ldflags" is empty)')
         np.show_config()
         print("Numpy dot module:", np.dot.__module__)
         print("Numpy location:", np.__file__)
         print("Numpy version:", np.__version__)
 
-    a = theano.shared(np.ones((M, N), dtype=config.floatX, order=order))
-    b = theano.shared(np.ones((N, K), dtype=config.floatX, order=order))
-    c = theano.shared(np.ones((M, K), dtype=config.floatX, order=order))
-    f = theano.function([], updates=[(c, 0.4 * c + 0.8 * dot(a, b))])
+    a = aesara.shared(np.ones((M, N), dtype=config.floatX, order=order))
+    b = aesara.shared(np.ones((N, K), dtype=config.floatX, order=order))
+    c = aesara.shared(np.ones((M, K), dtype=config.floatX, order=order))
+    f = aesara.function([], updates=[(c, 0.4 * c + 0.8 * dot(a, b))])
 
     if any([x.op.__class__.__name__ == "Gemm" for x in f.maker.fgraph.toposort()]):
         c_impl = [
@@ -64,13 +64,13 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order=
         ]
         assert len(c_impl) == 1
         if c_impl[0]:
-            impl = "CPU (with direct Theano binding to blas)"
+            impl = "CPU (with direct Aesara binding to blas)"
         else:
-            impl = "CPU (without direct Theano binding to blas but with numpy/scipy binding to blas)"
+            impl = "CPU (without direct Aesara binding to blas but with numpy/scipy binding to blas)"
     elif any([x.op.__class__.__name__ == "GpuGemm" for x in f.maker.fgraph.toposort()]):
         impl = "GPU"
     else:
-        impl = "ERROR, unable to tell if Theano used the cpu or the gpu:\n"
+        impl = "ERROR, unable to tell if Aesara used the cpu or the gpu:\n"
         impl += str(f.maker.fgraph.toposort())
 
     t0 = 0
@@ -78,8 +78,8 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order=
 
     f()  # Ignore first function call to get representative time.
     if execute:
-        sync = hasattr(theano, "gpuarray") and isinstance(
-            c, theano.gpuarray.GpuArraySharedVariable
+        sync = hasattr(aesara, "gpuarray") and isinstance(
+            c, aesara.gpuarray.GpuArraySharedVariable
         )
         if sync:
             # Make sure we don't include the time from the first call
diff --git a/theano/misc/check_blas_many.sh b/aesara/misc/check_blas_many.sh
similarity index 71%
rename from theano/misc/check_blas_many.sh
rename to aesara/misc/check_blas_many.sh
index ac215a0415..fcdad5ba7f 100755
--- a/theano/misc/check_blas_many.sh
+++ b/aesara/misc/check_blas_many.sh
@@ -10,12 +10,12 @@ uname -a
 TIME_PREFIX=time
 VAR=OMP_NUM_THREADS
 echo "numpy gemm take="
-THEANO_FLAGS=blas__ldflags= $TIME_PREFIX python misc/check_blas.py --quiet
+AESARA_FLAGS=blas__ldflags= $TIME_PREFIX python misc/check_blas.py --quiet
 for i in 1 2 4 8
 do
   export $VAR=$i
   x=`$TIME_PREFIX python misc/check_blas.py --quiet`
-  echo "theano gemm with $VAR=$i took: ${x}s"
+  echo "aesara gemm with $VAR=$i took: ${x}s"
 done
 
-#Fred to test distro numpy at LISA: PYTHONPATH=/u/bastienf/repos:/usr/lib64/python2.5/site-packages THEANO_FLAGS=blas__ldflags= OMP_NUM_THREADS=8 time python misc/check_blas.py
+#Fred to test distro numpy at LISA: PYTHONPATH=/u/bastienf/repos:/usr/lib64/python2.5/site-packages AESARA_FLAGS=blas__ldflags= OMP_NUM_THREADS=8 time python misc/check_blas.py
diff --git a/theano/misc/check_duplicate_key.py b/aesara/misc/check_duplicate_key.py
similarity index 98%
rename from theano/misc/check_duplicate_key.py
rename to aesara/misc/check_duplicate_key.py
index f0fa498775..9eb5915c97 100644
--- a/theano/misc/check_duplicate_key.py
+++ b/aesara/misc/check_duplicate_key.py
@@ -2,7 +2,7 @@
 import pickle
 import sys
 
-from theano.configdefaults import config
+from aesara.configdefaults import config
 
 
 DISPLAY_DUPLICATE_KEYS = False
diff --git a/theano/misc/check_multi_gpu.py b/aesara/misc/check_multi_gpu.py
similarity index 77%
rename from theano/misc/check_multi_gpu.py
rename to aesara/misc/check_multi_gpu.py
index e32c231c8c..92dd07510a 100644
--- a/theano/misc/check_multi_gpu.py
+++ b/aesara/misc/check_multi_gpu.py
@@ -11,9 +11,9 @@
 
 import numpy as np
 
-import theano
-from theano.gpuarray import init_dev
-from theano.gpuarray.blas import gpu_dot22
+import aesara
+from aesara.gpuarray import init_dev
+from aesara.gpuarray.blas import gpu_dot22
 
 
 def main(dev1, dev2):
@@ -22,20 +22,20 @@ def main(dev1, dev2):
 
     size = 1024 * 16
     data = np.random.randn(size, size).astype("float32")
-    val1a = theano.shared(data, target="ctx1")
-    val1b = theano.shared(data, target="ctx1")
-    val1c = theano.shared(data, target="ctx1")
-    val1d = theano.shared(data, target="ctx1")
-
-    val2a = theano.shared(data, target="ctx2")
-    val2b = theano.shared(data, target="ctx2")
-
-    f1 = theano.function([], [gpu_dot22(val1a, val1b), gpu_dot22(val1c, val1d)])
-    f2 = theano.function([], [gpu_dot22(val1a, val1b), gpu_dot22(val2a, val2b)])
-    f3 = theano.function([], [gpu_dot22(val1a, val1b)])
-    f4 = theano.function([], [gpu_dot22(val2a, val2b)])
-    f5 = theano.function([], [gpu_dot22(val1a, val1b)[0, 0].transfer("cpu")])
-    f6 = theano.function([], [gpu_dot22(val2a, val2b)[0, 0].transfer("cpu")])
+    val1a = aesara.shared(data, target="ctx1")
+    val1b = aesara.shared(data, target="ctx1")
+    val1c = aesara.shared(data, target="ctx1")
+    val1d = aesara.shared(data, target="ctx1")
+
+    val2a = aesara.shared(data, target="ctx2")
+    val2b = aesara.shared(data, target="ctx2")
+
+    f1 = aesara.function([], [gpu_dot22(val1a, val1b), gpu_dot22(val1c, val1d)])
+    f2 = aesara.function([], [gpu_dot22(val1a, val1b), gpu_dot22(val2a, val2b)])
+    f3 = aesara.function([], [gpu_dot22(val1a, val1b)])
+    f4 = aesara.function([], [gpu_dot22(val2a, val2b)])
+    f5 = aesara.function([], [gpu_dot22(val1a, val1b)[0, 0].transfer("cpu")])
+    f6 = aesara.function([], [gpu_dot22(val2a, val2b)[0, 0].transfer("cpu")])
 
     # pre-execute to load code to GPU.
     r = f1.fn()
diff --git a/theano/misc/doubleop.py b/aesara/misc/doubleop.py
similarity index 79%
rename from theano/misc/doubleop.py
rename to aesara/misc/doubleop.py
index 2bf5f86605..150d6d4034 100644
--- a/theano/misc/doubleop.py
+++ b/aesara/misc/doubleop.py
@@ -1,7 +1,7 @@
-# This is the example in the Theano/doc/tutorial/extending_theano.txt
-import theano
-from theano.graph.basic import Apply
-from theano.graph.op import Op
+# This is the example in the Aesara/doc/tutorial/extending_aesara.txt
+import aesara
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
 
 
 class DoubleOp(Op):
@@ -25,8 +25,8 @@ class DoubleOp(Op):
 
     See Also
     --------
-    :class:`~theano.tensor.elemwise.Elemwise` : You can use this to replace
-    this example.  Just execute `x * 2` with x being a Theano variable.
+    :class:`~aesara.tensor.elemwise.Elemwise` : You can use this to replace
+    this example.  Just execute `x * 2` with x being an Aesara variable.
 
 
     .. versionadded:: 0.6
@@ -42,7 +42,7 @@ def __str__(self):
         return self.__class__.__name__
 
     def make_node(self, x):
-        x = theano.tensor.as_tensor_variable(x)
+        x = aesara.tensor.as_tensor_variable(x)
         return Apply(self, [x], [x.type()])
 
     def perform(self, node, inputs, output_storage):
diff --git a/theano/misc/elemwise_openmp_speedup.py b/aesara/misc/elemwise_openmp_speedup.py
similarity index 90%
rename from theano/misc/elemwise_openmp_speedup.py
rename to aesara/misc/elemwise_openmp_speedup.py
index 96f3d2ce39..4319e40f6b 100644
--- a/theano/misc/elemwise_openmp_speedup.py
+++ b/aesara/misc/elemwise_openmp_speedup.py
@@ -4,7 +4,7 @@
 from locale import getpreferredencoding
 from optparse import OptionParser
 
-from theano.configdefaults import config
+from aesara.configdefaults import config
 
 
 console_encoding = getpreferredencoding()
@@ -44,10 +44,10 @@ def runScript(N):
     if hasattr(options, "help"):
         print(options.help)
         sys.exit(0)
-    orig_flags = os.environ.get("THEANO_FLAGS", "")
-    os.environ["THEANO_FLAGS"] = orig_flags + ",openmp=false"
+    orig_flags = os.environ.get("AESARA_FLAGS", "")
+    os.environ["AESARA_FLAGS"] = orig_flags + ",openmp=false"
     (cheapTime, costlyTime) = runScript(N=options.N)
-    os.environ["THEANO_FLAGS"] = orig_flags + ",openmp=true"
+    os.environ["AESARA_FLAGS"] = orig_flags + ",openmp=true"
     (cheapTimeOpenmp, costlyTimeOpenmp) = runScript(N=options.N)
 
     if cheapTime > cheapTimeOpenmp:
diff --git a/theano/misc/elemwise_time_test.py b/aesara/misc/elemwise_time_test.py
similarity index 88%
rename from theano/misc/elemwise_time_test.py
rename to aesara/misc/elemwise_time_test.py
index 5ac3f86fbe..60727c563b 100644
--- a/theano/misc/elemwise_time_test.py
+++ b/aesara/misc/elemwise_time_test.py
@@ -4,10 +4,10 @@
 
 import numpy as np
 
-import theano
-from theano.configdefaults import config
-from theano.tensor.math import tanh
-from theano.tensor.type import vector
+import aesara
+from aesara.configdefaults import config
+from aesara.tensor.math import tanh
+from aesara.tensor.type import vector
 
 
 parser = OptionParser(
@@ -47,8 +47,8 @@ def ElemwiseOpTime(N, script=False, loops=1000):
     x = vector("x")
     np.random.seed(1235)
     v = np.random.random(N).astype(config.floatX)
-    f = theano.function([x], 2 * x + x * x)
-    f1 = theano.function([x], tanh(x))
+    f = aesara.function([x], 2 * x + x * x)
+    f1 = aesara.function([x], tanh(x))
     if not script:
         if config.openmp:
             print("With openmp:")
diff --git a/theano/misc/frozendict.py b/aesara/misc/frozendict.py
similarity index 100%
rename from theano/misc/frozendict.py
rename to aesara/misc/frozendict.py
diff --git a/theano/misc/latence_gpu_transfert.py b/aesara/misc/latence_gpu_transfert.py
similarity index 68%
rename from theano/misc/latence_gpu_transfert.py
rename to aesara/misc/latence_gpu_transfert.py
index 05e676e73f..e626728847 100644
--- a/theano/misc/latence_gpu_transfert.py
+++ b/aesara/misc/latence_gpu_transfert.py
@@ -2,13 +2,13 @@
 
 import numpy as np
 
-import theano
+import aesara
 
 
-y = theano.tensor.type.fvector()
-x = theano.shared(np.zeros(1, dtype="float32"))
-f1 = theano.function([y], updates={x: y})
-f2 = theano.function([], x.transfer("cpu"))
+y = aesara.tensor.type.fvector()
+x = aesara.shared(np.zeros(1, dtype="float32"))
+f1 = aesara.function([y], updates={x: y})
+f2 = aesara.function([], x.transfer("cpu"))
 print(f1.maker.fgraph.toposort())
 print(f2.maker.fgraph.toposort())
 for i in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:
diff --git a/theano/misc/may_share_memory.py b/aesara/misc/may_share_memory.py
similarity index 92%
rename from theano/misc/may_share_memory.py
rename to aesara/misc/may_share_memory.py
index 4417e2d505..b522834e33 100644
--- a/theano/misc/may_share_memory.py
+++ b/aesara/misc/may_share_memory.py
@@ -6,13 +6,13 @@
 
 import numpy as np
 
-from theano.tensor.type import TensorType
+from aesara.tensor.type import TensorType
 
 
 try:
     import scipy.sparse
 
-    from theano.sparse.basic import SparseType
+    from aesara.sparse.basic import SparseType
 
     def _is_sparse(a):
         return scipy.sparse.issparse(a)
@@ -24,7 +24,7 @@ def _is_sparse(a):
         return False
 
 
-from theano import gpuarray
+from aesara import gpuarray
 
 
 if gpuarray.pygpu:
diff --git a/theano/misc/ordered_set.py b/aesara/misc/ordered_set.py
similarity index 97%
rename from theano/misc/ordered_set.py
rename to aesara/misc/ordered_set.py
index 9b71902530..397a67f0a6 100644
--- a/theano/misc/ordered_set.py
+++ b/aesara/misc/ordered_set.py
@@ -4,12 +4,12 @@
 
 
 def check_deterministic(iterable):
-    # Most places where OrderedSet is used, theano interprets any exception
+    # Most places where OrderedSet is used, aesara interprets any exception
     # whatsoever as a problem that an optimization introduced into the graph.
     # If I raise a TypeError when the DestoryHandler tries to do something
     # non-deterministic, it will just result in optimizations getting ignored.
     # So I must use an assert here. In the long term we should fix the rest of
-    # theano to use exceptions correctly, so that this can be a TypeError.
+    # aesara to use exceptions correctly, so that this can be a TypeError.
     if iterable is not None:
         if not isinstance(
             iterable, (list, tuple, OrderedSet, types.GeneratorType, str)
@@ -76,7 +76,7 @@ class OrderedSet(MutableSet):
     # Individual links are kept alive by the hard reference in self.__map.
     # Those hard references disappear when a key is deleted from an OrderedSet.
 
-    # Added by IG-- pre-existing theano code expected sets
+    # Added by IG-- pre-existing aesara code expected sets
     #   to have this method
     def update(self, iterable):
         check_deterministic(iterable)
diff --git a/theano/misc/pkl_utils.py b/aesara/misc/pkl_utils.py
similarity index 95%
rename from theano/misc/pkl_utils.py
rename to aesara/misc/pkl_utils.py
index 6ffeaf115a..e31b0da3ab 100644
--- a/theano/misc/pkl_utils.py
+++ b/aesara/misc/pkl_utils.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-import theano
+import aesara
 
 
 try:
@@ -26,8 +26,8 @@
 except ImportError:
     DEFAULT_PROTOCOL = HIGHEST_PROTOCOL
 
-from theano.compile.sharedvalue import SharedVariable
-from theano.configdefaults import config
+from aesara.compile.sharedvalue import SharedVariable
+from aesara.configdefaults import config
 
 
 __docformat__ = "restructuredtext en"
@@ -45,7 +45,7 @@
 
 class StripPickler(Pickler):
     """
-    Subclass of Pickler that strips unnecessary attributes from Theano objects.
+    Subclass of Pickler that strips unnecessary attributes from Aesara objects.
 
     .. versionadded:: 0.8
 
@@ -70,12 +70,12 @@ def __init__(self, file, protocol=0, extra_tag_to_remove=None):
 
     def save(self, obj):
         # Remove the tag.trace attribute from Variable and Apply nodes
-        if isinstance(obj, theano.graph.utils.Scratchpad):
+        if isinstance(obj, aesara.graph.utils.Scratchpad):
             for tag in self.tag_to_remove:
                 if hasattr(obj, tag):
                     del obj.__dict__[tag]
         # Remove manually-added docstring of Elemwise ops
-        elif isinstance(obj, theano.tensor.elemwise.Elemwise):
+        elif isinstance(obj, aesara.tensor.elemwise.Elemwise):
             if "__doc__" in obj.__dict__:
                 del obj.__dict__["__doc__"]
 
@@ -123,7 +123,7 @@ def write_array(f):
 
 class PersistentGpuArrayID(PersistentNdarrayID):
     def __call__(self, obj):
-        from theano.gpuarray.type import _name_for_ctx
+        from aesara.gpuarray.type import _name_for_ctx
 
         try:
             import pygpu
@@ -213,8 +213,8 @@ def __init__(self, zip_file):
         self.cache = {}
 
     def __call__(self, persid):
-        from theano.gpuarray import pygpu
-        from theano.gpuarray.type import get_context
+        from aesara.gpuarray import pygpu
+        from aesara.gpuarray.type import get_context
 
         array_type, name = persid.split(".")
 
@@ -276,9 +276,9 @@ def dump(
         number of external objects. Note that the zip files are compatible with
         NumPy's :func:`numpy.load` function.
 
-    >>> import theano
-    >>> foo_1 = theano.shared(0, name='foo')
-    >>> foo_2 = theano.shared(1, name='foo')
+    >>> import aesara
+    >>> foo_1 = aesara.shared(0, name='foo')
+    >>> foo_2 = aesara.shared(1, name='foo')
     >>> with open('model.zip', 'wb') as f:
     ...     dump((foo_1, foo_2, np.array(2)), f)
     >>> np.load('model.zip').keys()
diff --git a/theano/misc/safe_asarray.py b/aesara/misc/safe_asarray.py
similarity index 98%
rename from theano/misc/safe_asarray.py
rename to aesara/misc/safe_asarray.py
index 731086dbfa..f773f30f3f 100644
--- a/theano/misc/safe_asarray.py
+++ b/aesara/misc/safe_asarray.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from theano.configdefaults import config
+from aesara.configdefaults import config
 
 
 __docformat__ = "restructuredtext en"
diff --git a/theano/printing.py b/aesara/printing.py
similarity index 97%
rename from theano/printing.py
rename to aesara/printing.py
index 4a6fff5e87..8d486c83c6 100644
--- a/theano/printing.py
+++ b/aesara/printing.py
@@ -14,10 +14,10 @@
 
 import numpy as np
 
-from theano.compile import Function, SharedVariable, debugmode
-from theano.compile.io import In, Out
-from theano.configdefaults import config
-from theano.graph.basic import (
+from aesara.compile import Function, SharedVariable, debugmode
+from aesara.compile.io import In, Out
+from aesara.configdefaults import config
+from aesara.graph.basic import (
     Apply,
     Constant,
     Node,
@@ -25,9 +25,9 @@
     graph_inputs,
     io_toposort,
 )
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import Op
-from theano.graph.utils import Scratchpad
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import Op
+from aesara.graph.utils import Scratchpad
 
 
 pydot_imported = False
@@ -63,7 +63,7 @@
         pydot_imported_msg += str(e.args)
 
 
-_logger = logging.getLogger("theano.printing")
+_logger = logging.getLogger("aesara.printing")
 VALID_ASSOC = {"left", "right", "either"}
 
 
@@ -80,7 +80,7 @@ def debugprint(
 ):
     """Print a computation graph as text to stdout or a file.
 
-    :type obj: :class:`~theano.graph.basic.Variable`, Apply, or Function instance
+    :type obj: :class:`~aesara.graph.basic.Variable`, Apply, or Function instance
     :param obj: symbolic thing to print
     :type depth: integer
     :param depth: print graph to this depth (-1 for unlimited)
@@ -101,8 +101,8 @@ def debugprint(
         Useful to have multiple call to debugprint share the same ids.
     :type print_storage: bool
     :param print_storage: If True, this will print the storage map
-        for Theano functions. Combined with allow_gc=False, after the
-        execution of a Theano function, we see the intermediate result.
+        for Aesara functions. Combined with allow_gc=False, after the
+        execution of an Aesara function, we see the intermediate result.
     :type used_ids: dict or None
     :param used_ids: the id to use for some object, but maybe we only
          referred to it yet.
@@ -125,7 +125,7 @@ def debugprint(
     to the Apply's identifier, to indicate which output a line corresponds to.
 
     """
-    from theano.scan.op import Scan
+    from aesara.scan.op import Scan
 
     if not isinstance(depth, int):
         raise Exception("depth parameter must be an int")
@@ -726,10 +726,10 @@ def pydotprint(
     print_output_file=True,
     return_image=False,
 ):
-    """Print to a file the graph of a compiled theano function's ops. Supports
+    """Print to a file the graph of a compiled aesara function's ops. Supports
     all pydot output formats, including png and svg.
 
-    :param fct: a compiled Theano function, a Variable, an Apply or
+    :param fct: a compiled Aesara function, a Variable, an Apply or
                 a list of Variable.
     :param outfile: the output file where to put the graph.
     :param compact: if True, will remove intermediate var that don't have name.
@@ -760,10 +760,10 @@ def pydotprint(
 
         .. code-block:: python
 
-            import theano
-            v = theano.tensor.vector()
+            import aesara
+            v = aesara.tensor.vector()
             from IPython.display import SVG
-            SVG(theano.printing.pydotprint(v*2, return_image=True,
+            SVG(aesara.printing.pydotprint(v*2, return_image=True,
                                            format='svg'))
 
     In the graph, ellipses are Apply Nodes (the execution of an op)
@@ -802,14 +802,14 @@ def pydotprint(
         scan separately after the top level debugprint output.
 
     """
-    from theano.scan.op import Scan
+    from aesara.scan.op import Scan
 
     if colorCodes is None:
         colorCodes = default_colorCodes
 
     if outfile is None:
         outfile = os.path.join(
-            config.compiledir, "theano.pydotprint." + config.device + "." + format
+            config.compiledir, "aesara.pydotprint." + config.device + "." + format
         )
 
     if isinstance(fct, Function):
@@ -1342,7 +1342,7 @@ def var_descriptor(obj, _prev_obs=None, _tag_generator=None):
 
 def position_independent_str(obj):
     if isinstance(obj, Variable):
-        rval = "theano_var"
+        rval = "aesara_var"
         rval += "{type=" + str(obj.type) + "}"
     else:
         raise NotImplementedError()
diff --git a/theano/raise_op.py b/aesara/raise_op.py
similarity index 92%
rename from theano/raise_op.py
rename to aesara/raise_op.py
index 2ccabb76c7..94c1ca22c2 100644
--- a/theano/raise_op.py
+++ b/aesara/raise_op.py
@@ -1,7 +1,7 @@
 """Symbolic Op for raising an exception."""
 
-from theano.graph.basic import Apply
-from theano.graph.op import Op
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
 
 
 __authors__ = "James Bergstra" "PyMC Dev Team"
diff --git a/theano/sandbox/__init__.py b/aesara/sandbox/__init__.py
similarity index 100%
rename from theano/sandbox/__init__.py
rename to aesara/sandbox/__init__.py
diff --git a/theano/sandbox/fourier.py b/aesara/sandbox/fourier.py
similarity index 91%
rename from theano/sandbox/fourier.py
rename to aesara/sandbox/fourier.py
index 893f832783..56e7a8702a 100644
--- a/theano/sandbox/fourier.py
+++ b/aesara/sandbox/fourier.py
@@ -10,16 +10,16 @@
 import numpy as np
 import numpy.fft
 
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.graph.type import generic
-from theano.tensor.basic import as_tensor
-from theano.tensor.type import zmatrix
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.graph.type import generic
+from aesara.tensor.basic import as_tensor
+from aesara.tensor.type import zmatrix
 
 
 message = (
-    "The module theano.sandbox.fourier will soon be deprecated."
-    " Please use theano.tensor.fft, which supports gradients and "
+    "The module aesara.sandbox.fourier will soon be deprecated."
+    " Please use aesara.tensor.fft, which supports gradients and "
     "automatic optimization transfers to the GPU ops."
 )
 warnings.warn(message)
diff --git a/aesara/sandbox/linalg/__init__.py b/aesara/sandbox/linalg/__init__.py
new file mode 100644
index 0000000000..85fd0dcf6a
--- /dev/null
+++ b/aesara/sandbox/linalg/__init__.py
@@ -0,0 +1,12 @@
+from aesara.sandbox.linalg.ops import psd, spectral_radius_bound
+from aesara.tensor.nlinalg import (
+    alloc_diag,
+    det,
+    diag,
+    eig,
+    eigh,
+    extract_diag,
+    matrix_inverse,
+    trace,
+)
+from aesara.tensor.slinalg import cholesky, eigvalsh, solve
diff --git a/theano/sandbox/linalg/ops.py b/aesara/sandbox/linalg/ops.py
similarity index 94%
rename from theano/sandbox/linalg/ops.py
rename to aesara/sandbox/linalg/ops.py
index 732cc28275..4fea1d2e8f 100644
--- a/theano/sandbox/linalg/ops.py
+++ b/aesara/sandbox/linalg/ops.py
@@ -1,29 +1,29 @@
 import logging
 
-import theano.tensor
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.graph.opt import GlobalOptimizer, local_optimizer
-from theano.tensor import basic as tt
-from theano.tensor.basic_opt import (
+import aesara.tensor
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.graph.opt import GlobalOptimizer, local_optimizer
+from aesara.tensor import basic as tt
+from aesara.tensor.basic_opt import (
     register_canonicalize,
     register_specialize,
     register_stabilize,
 )
-from theano.tensor.blas import Dot22
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import Dot, Prod, dot, log
-from theano.tensor.math import pow as tt_pow
-from theano.tensor.math import prod
-from theano.tensor.nlinalg import (
+from aesara.tensor.blas import Dot22
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import Dot, Prod, dot, log
+from aesara.tensor.math import pow as tt_pow
+from aesara.tensor.math import prod
+from aesara.tensor.nlinalg import (
     MatrixInverse,
     det,
     extract_diag,
     matrix_inverse,
     trace,
 )
-from theano.tensor.slinalg import Cholesky, Solve, cholesky, imported_scipy, solve
+from aesara.tensor.slinalg import Cholesky, Solve, cholesky, imported_scipy, solve
 
 
 logger = logging.getLogger(__name__)
@@ -182,7 +182,7 @@ def apply(self, fgraph):
 
 
 # -1 should make it run right before the first merge
-theano.compile.mode.optdb.register(
+aesara.compile.mode.optdb.register(
     "HintsOpt", HintsOptimizer(), -1, "fast_run", "fast_compile"
 )
 
diff --git a/theano/sandbox/minimal.py b/aesara/sandbox/minimal.py
similarity index 89%
rename from theano/sandbox/minimal.py
rename to aesara/sandbox/minimal.py
index a8394abdc5..f54cacb490 100644
--- a/theano/sandbox/minimal.py
+++ b/aesara/sandbox/minimal.py
@@ -1,8 +1,8 @@
 import numpy as np
 
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor.type import lscalar
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor.type import lscalar
 
 
 class Minimal(Op):
@@ -25,7 +25,7 @@ def __init__(self):
         super().__init__()
 
     def make_node(self, *args):
-        # HERE `args` must be THEANO VARIABLES
+        # HERE `args` must be AESARA VARIABLES
         return Apply(op=self, inputs=args, outputs=[lscalar()])
 
     def perform(self, node, inputs, out_):
diff --git a/theano/sandbox/multinomial.py b/aesara/sandbox/multinomial.py
similarity index 97%
rename from theano/sandbox/multinomial.py
rename to aesara/sandbox/multinomial.py
index 92c7de0703..161ec5a65c 100644
--- a/theano/sandbox/multinomial.py
+++ b/aesara/sandbox/multinomial.py
@@ -3,12 +3,12 @@
 
 import numpy as np
 
-import theano.tensor as tt
-from theano.configdefaults import config
-from theano.graph.basic import Apply
-from theano.graph.op import COp
-from theano.scalar import Scalar, as_scalar
-from theano.tensor.type import discrete_dtypes
+import aesara.tensor as tt
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp
+from aesara.scalar import Scalar, as_scalar
+from aesara.tensor.type import discrete_dtypes
 
 
 class MultinomialFromUniform(COp):
@@ -72,8 +72,8 @@ def c_code(self, node, name, ins, outs, sub):
             t = f"PyArray_TYPE({pvals})"
         else:
             t = Scalar(self.odtype).dtype_specs()[1]
-            if t.startswith("theano_complex"):
-                t = t.replace("theano_complex", "NPY_COMPLEX")
+            if t.startswith("aesara_complex"):
+                t = t.replace("aesara_complex", "NPY_COMPLEX")
             else:
                 t = t.upper()
         fail = sub["fail"]
@@ -264,8 +264,8 @@ def c_code(self, node, name, ins, outs, sub):
             t = "NPY_INT64"
         else:
             t = Scalar(self.odtype).dtype_specs()[1]
-            if t.startswith("theano_complex"):
-                t = t.replace("theano_complex", "NPY_COMPLEX")
+            if t.startswith("aesara_complex"):
+                t = t.replace("aesara_complex", "NPY_COMPLEX")
             else:
                 t = t.upper()
         fail = sub["fail"]
diff --git a/theano/sandbox/neighbours.py b/aesara/sandbox/neighbours.py
similarity index 51%
rename from theano/sandbox/neighbours.py
rename to aesara/sandbox/neighbours.py
index 18002157a1..a8ca8427bc 100644
--- a/theano/sandbox/neighbours.py
+++ b/aesara/sandbox/neighbours.py
@@ -1,9 +1,9 @@
 """
-Neighbours was moved into theano.tensor.nnet.neighbours.
+Neighbours was moved into aesara.tensor.nnet.neighbours.
 This file was created for compatibility.
 """
 
-from theano.tensor.nnet.neighbours import Images2Neibs, images2neibs, neibs2images
+from aesara.tensor.nnet.neighbours import Images2Neibs, images2neibs, neibs2images
 
 
 __all__ = ["images2neibs", "neibs2images", "Images2Neibs"]
diff --git a/theano/sandbox/rng_mrg.py b/aesara/sandbox/rng_mrg.py
similarity index 95%
rename from theano/sandbox/rng_mrg.py
rename to aesara/sandbox/rng_mrg.py
index 3c5f113224..12d53a5843 100644
--- a/theano/sandbox/rng_mrg.py
+++ b/aesara/sandbox/rng_mrg.py
@@ -1,5 +1,5 @@
 """
-Implementation of MRG31k3p random number generator for Theano.
+Implementation of MRG31k3p random number generator for Aesara.
 
 Generator code in SSJ package (L'Ecuyer & Simard).
 http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
@@ -17,24 +17,24 @@
 
 import numpy as np
 
-from theano import function, gradient
-from theano import scalar as ts
-from theano import shared
-from theano import tensor as tt
-from theano.compile import optdb
-from theano.configdefaults import config
-from theano.gradient import undefined_grad
-from theano.graph.basic import Apply, Constant, Variable
-from theano.graph.op import COp, Op
-from theano.graph.opt import in2out, local_optimizer
-from theano.graph.params_type import ParamsType
-from theano.sandbox import multinomial
-from theano.scalar import bool as bool_t
-from theano.scalar import int32 as int_t
-from theano.tensor import as_tensor_variable, cast, get_vector_length
-from theano.tensor.math import cos, log, prod, sin, sqrt
-from theano.tensor.shape import reshape
-from theano.tensor.type import TensorType, iscalar, ivector, lmatrix
+from aesara import function, gradient
+from aesara import scalar as ts
+from aesara import shared
+from aesara import tensor as tt
+from aesara.compile import optdb
+from aesara.configdefaults import config
+from aesara.gradient import undefined_grad
+from aesara.graph.basic import Apply, Constant, Variable
+from aesara.graph.op import COp, Op
+from aesara.graph.opt import in2out, local_optimizer
+from aesara.graph.params_type import ParamsType
+from aesara.sandbox import multinomial
+from aesara.scalar import bool as bool_t
+from aesara.scalar import int32 as int_t
+from aesara.tensor import as_tensor_variable, cast, get_vector_length
+from aesara.tensor.math import cos, log, prod, sin, sqrt
+from aesara.tensor.shape import reshape
+from aesara.tensor.type import TensorType, iscalar, ivector, lmatrix
 
 
 def matVecModM(A, s, m):
@@ -67,7 +67,7 @@ def multMatVect(v, A, m1, B, m2):
             [A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym], o, profile=False
         )
 
-    # This way of calling the Theano fct is done to bypass Theano overhead.
+    # This way of calling the Aesara fct is done to bypass Aesara overhead.
     f = multMatVect.dot_modulo
     f.input_storage[0].storage[0] = A
     f.input_storage[1].storage[0] = v[:3]
@@ -817,7 +817,7 @@ def get_substream_rstates(self, n_streams, dtype, inc_rstate=True):
         if multMatVect.dot_modulo is None:
             multMatVect(rval[0], A1p72, M1, A2p72, M2)
 
-        # This way of calling the Theano fct is done to bypass Theano overhead.
+        # This way of calling the Aesara fct is done to bypass Aesara overhead.
         f = multMatVect.dot_modulo
         f.input_storage[0].storage[0] = A1p72
         f.input_storage[2].storage[0] = M1
@@ -871,8 +871,8 @@ def uniform(
             If the ``dtype`` arg is provided, ``high`` will be cast into
             dtype. This bound is excluded.
         size
-          Can be a list of integer or Theano variable (ex: the shape
-          of other Theano Variable).
+          Can be a list of integer or Aesara variable (ex: the shape
+          of other Aesara Variable).
         dtype
             The output data type. If dtype is not specified, it will be
             inferred from the dtype of low and high, but will be at
@@ -892,7 +892,7 @@ def uniform(
         high = undefined_grad(high)
 
         if isinstance(size, tuple):
-            msg = "size must be a tuple of int or a Theano variable"
+            msg = "size must be a tuple of int or an Aesara variable"
             assert all([isinstance(i, (np.integer, int, Variable)) for i in size]), msg
             if any([isinstance(i, (np.integer, int)) and i <= 0 for i in size]):
                 raise ValueError(
@@ -902,7 +902,7 @@ def uniform(
         else:
             if not (isinstance(size, Variable) and size.ndim == 1):
                 raise TypeError(
-                    "size must be a tuple of int or a Theano "
+                    "size must be a tuple of int or an Aesara "
                     "Variable with 1 dimension, got "
                     + str(size)
                     + " of type "
@@ -1043,7 +1043,7 @@ def choice(
         replace: bool (default True)
             Whether the sample is with or without replacement.
             Only replace=False is implemented for now.
-        p: 2d numpy array or theano tensor
+        p: 2d numpy array or aesara tensor
             the probabilities of the distribution, corresponding to values
             0 to `p.shape[1]-1`.
 
@@ -1111,7 +1111,7 @@ def multinomial_wo_replacement(
         warnings.warn(
             "MRG_RandomStream.multinomial_wo_replacement() is "
             "deprecated and will be removed in the next release of "
-            "Theano. Please use MRG_RandomStream.choice() instead."
+            "Aesara. Please use MRG_RandomStream.choice() instead."
         )
         assert size is None
         return self.choice(
@@ -1162,7 +1162,7 @@ def normal(
         Returns
         -------
         samples : TensorVariable
-            A Theano tensor of samples randomly drawn from a normal distribution.
+            A Aesara tensor of samples randomly drawn from a normal distribution.
 
         """
         size = _check_size(size)
@@ -1268,7 +1268,7 @@ def truncated_normal(
         Returns
         -------
         samples : TensorVariable
-            A Theano tensor of samples randomly drawn from a truncated normal distribution.
+            A Aesara tensor of samples randomly drawn from a truncated normal distribution.
 
         See Also
         --------
@@ -1290,19 +1290,19 @@ def truncated_normal(
 
 def _check_size(size):
     """
-    Canonicalise inputs to get valid output sizes for Theano tensors.
+    Canonicalise inputs to get valid output sizes for Aesara tensors.
 
     Parameters
     ----------
     size : int_vector_like
-        Some variable that could serve as the shape for a Theano tensor.
+        Some variable that could serve as the shape for an Aesara tensor.
         This can be an int, a tuple of ints, a list of ints
-        or a Theano Variable with similar properties.
+        or an Aesara Variable with similar properties.
 
     Returns
     -------
     size_var : int_vector
-        A one-dimensional Theano variable encapsulating the given size.
+        A one-dimensional Aesara variable encapsulating the given size.
 
     Raises
     ------
@@ -1317,18 +1317,18 @@ def _check_size(size):
             return tt.stack([size], ndim=1)
         else:
             raise ValueError(
-                "Theano variable must have 1 dimension to be a valid size.", size
+                "Aesara variable must have 1 dimension to be a valid size.", size
             )
     elif isinstance(size, (np.integer, int)):
         return tt.constant([size], ndim=1)
     elif not isinstance(size, (tuple, list)):
-        raise ValueError("Size must be a int, tuple, list or Theano variable.", size)
+        raise ValueError("Size must be a int, tuple, list or Aesara variable.", size)
 
     # check entries of list or tuple
     for i in size:
         if isinstance(i, Variable):
             if i.ndim != 0:
-                raise ValueError("Non-scalar Theano variable in size", size, i)
+                raise ValueError("Non-scalar Aesara variable in size", size, i)
         elif isinstance(i, (np.integer, int)):
             if i <= 0:
                 raise ValueError(
@@ -1336,7 +1336,7 @@ def _check_size(size):
                 )
         else:
             raise ValueError(
-                "Only Theano variables and integers are allowed in a size-tuple.",
+                "Only Aesara variables and integers are allowed in a size-tuple.",
                 size,
                 i,
             )
diff --git a/theano/sandbox/samples_MRG31k3p_12_7_5.txt b/aesara/sandbox/samples_MRG31k3p_12_7_5.txt
similarity index 100%
rename from theano/sandbox/samples_MRG31k3p_12_7_5.txt
rename to aesara/sandbox/samples_MRG31k3p_12_7_5.txt
diff --git a/aesara/sandbox/solve.py b/aesara/sandbox/solve.py
new file mode 100644
index 0000000000..ef9022310f
--- /dev/null
+++ b/aesara/sandbox/solve.py
@@ -0,0 +1,11 @@
+import warnings
+
+
+from aesara.tensor.slinalg import solve  # noqa
+
+message = (
+    "The module aesara.sandbox.solve will soon be deprecated.\n"
+    "Please use tensor.slinalg.solve instead."
+)
+
+warnings.warn(message)
diff --git a/theano/scalar/__init__.py b/aesara/scalar/__init__.py
similarity index 100%
rename from theano/scalar/__init__.py
rename to aesara/scalar/__init__.py
diff --git a/theano/scalar/basic.py b/aesara/scalar/basic.py
similarity index 98%
rename from theano/scalar/basic.py
rename to aesara/scalar/basic.py
index 6e3982d149..f7e4089c0d 100644
--- a/theano/scalar/basic.py
+++ b/aesara/scalar/basic.py
@@ -1,13 +1,13 @@
 """
 .. warning::
 
-This directory is for the internal of Theano.
+This directory is for the internal of Aesara.
 
 You are strongly advised not to use it, except if you know
 what you are doing!
 
-If you want to use a scalar variable in a Theano graph,
-you probably want to use theano.tensor.[c,z,f,d,b,w,i,l,]scalar!
+If you want to use a scalar variable in an Aesara graph,
+you probably want to use aesara.tensor.[c,z,f,d,b,w,i,l,]scalar!
 """
 
 import math
@@ -18,19 +18,19 @@
 
 import numpy as np
 
-import theano
-from theano import printing
-from theano.configdefaults import config
-from theano.gradient import DisconnectedType, grad_undefined
-from theano.graph.basic import Apply, Constant, Variable, clone, list_of_nodes
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import COp
-from theano.graph.opt import MergeOptimizer
-from theano.graph.type import CType
-from theano.graph.utils import MetaObject, MethodNotDefined
-from theano.misc.safe_asarray import _asarray
-from theano.printing import pprint
-from theano.utils import (
+import aesara
+from aesara import printing
+from aesara.configdefaults import config
+from aesara.gradient import DisconnectedType, grad_undefined
+from aesara.graph.basic import Apply, Constant, Variable, clone, list_of_nodes
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import COp
+from aesara.graph.opt import MergeOptimizer
+from aesara.graph.type import CType
+from aesara.graph.utils import MetaObject, MethodNotDefined
+from aesara.misc.safe_asarray import _asarray
+from aesara.printing import pprint
+from aesara.utils import (
     apply_across_args,
     difference,
     from_return_values,
@@ -94,7 +94,7 @@ def make_array(dt):
 
 def as_common_dtype(*vars):
     """
-    For for theano.scalar.Scalar and TensorVariable.
+    For for aesara.scalar.Scalar and TensorVariable.
     """
     dtype = upcast(*[v.dtype for v in vars])
     return (v.astype(dtype) for v in vars)
@@ -116,8 +116,8 @@ def get_scalar_type(dtype):
 
 
 def as_scalar(x, name=None):
-    from theano.tensor.basic import scalar_from_tensor
-    from theano.tensor.type import TensorType
+    from aesara.tensor.basic import scalar_from_tensor
+    from aesara.tensor.type import TensorType
 
     if isinstance(x, Apply):
         if len(x.outputs) != 1:
@@ -414,7 +414,7 @@ def dtype_specs(self):
             # Windows and Linux.
             # NOTE: equivalent type on a platform can have different typenum.
             #     This is the source of all dtype/typenum problem found up to
-            #     now, as Theano always expect the exact typenum that
+            #     now, as Aesara always expect the exact typenum that
             #     correspond to our supported dtype.
             """
             for dtype in ['bool', 'int8', 'uint8', 'short', 'ushort', 'intc',
@@ -431,8 +431,8 @@ def dtype_specs(self):
                 "float16": (np.float16, "npy_float16", "Float16"),
                 "float32": (np.float32, "npy_float32", "Float32"),
                 "float64": (np.float64, "npy_float64", "Float64"),
-                "complex128": (np.complex128, "theano_complex128", "Complex128"),
-                "complex64": (np.complex64, "theano_complex64", "Complex64"),
+                "complex128": (np.complex128, "aesara_complex128", "Complex128"),
+                "complex64": (np.complex64, "aesara_complex64", "Complex64"),
                 "bool": (np.bool_, "npy_bool", "Bool"),
                 "uint8": (np.uint8, "npy_uint8", "UInt8"),
                 "int8": (np.int8, "npy_int8", "Int8"),
@@ -536,7 +536,7 @@ def c_cleanup(self, name, sub):
     def c_support_code(self, **kwargs):
 
         if self.dtype.startswith("complex"):
-            cplx_types = ["theano_complex64", "theano_complex128"]
+            cplx_types = ["aesara_complex64", "aesara_complex128"]
             real_types = [
                 "npy_int8",
                 "npy_int16",
@@ -547,15 +547,15 @@ def c_support_code(self, **kwargs):
             ]
             # If the 'int' C type is not exactly the same as an existing
             # 'npy_intX', some C code may not compile, e.g. when assigning
-            # the value 0 (cast to 'int' in C) to a theano_complex64.
+            # the value 0 (cast to 'int' in C) to an Aesara_complex64.
             if np.dtype("intc").num not in [np.dtype(d[4:]).num for d in real_types]:
                 # In that case we add the 'int' type to the real types.
                 real_types.append("int")
 
             template = """
-            struct theano_complex%(nbits)s : public npy_complex%(nbits)s
+            struct aesara_complex%(nbits)s : public npy_complex%(nbits)s
             {
-                typedef theano_complex%(nbits)s complex_type;
+                typedef aesara_complex%(nbits)s complex_type;
                 typedef npy_float%(half_nbits)s scalar_type;
 
                 complex_type operator +(const complex_type &y) const {
@@ -599,13 +599,13 @@ def c_support_code(self, **kwargs):
                 template <typename T>
                 complex_type& operator =(const T& y);
 
-                theano_complex%(nbits)s() {}
+                aesara_complex%(nbits)s() {}
 
                 template <typename T>
-                theano_complex%(nbits)s(const T& y) { *this = y; }
+                aesara_complex%(nbits)s(const T& y) { *this = y; }
 
                 template <typename TR, typename TI>
-                theano_complex%(nbits)s(const TR& r, const TI& i) { this->real=r; this->imag=i; }
+                aesara_complex%(nbits)s(const TR& r, const TI& i) { this->real=r; this->imag=i; }
             };
             """
 
@@ -708,7 +708,7 @@ def get_size(self, shape_info):
 
 
 # Register C code for ViewOp on Scalars.
-theano.compile.register_view_op_c_code(
+aesara.compile.register_view_op_c_code(
     Scalar,
     """
     %(oname)s = %(iname)s;
@@ -2008,7 +2008,7 @@ def grad(self, inputs, gout):
 class IntDiv(BinaryScalarOp):
     nfunc_spec = ("floor_divide", 2, 1)
     complex_error = ComplexError(
-        "Theano does not support integer division (//) on "
+        "Aesara does not support integer division (//) on "
         "complex numbers, since numpy deprecated it."
     )
 
@@ -2019,7 +2019,7 @@ def c_support_code(self, **kwargs):
         # We use a macro as python use % as a special string character,
         # and the output of c_code may be run through another level
         # of string formatting.
-        return "#define THEANO_MACRO_MOD(x,y) (x % y)"
+        return "#define AESARA_MACRO_MOD(x,y) (x % y)"
 
     def c_code(self, node, name, inputs, outputs, sub):
         (x, y) = inputs
@@ -2030,9 +2030,9 @@ def c_code(self, node, name, inputs, outputs, sub):
         if t in map(str, discrete_types):
             x_div_y_pp = f"({x} / {y})"
             x_div_y_mp = f"((-{x}) / {y})"
-            x_mod_y_mp = f"THEANO_MACRO_MOD((-{x}), {y})"
+            x_mod_y_mp = f"AESARA_MACRO_MOD((-{x}), {y})"
             x_div_y_pm = f"({x} / (-{y}))"
-            x_mod_y_pm = f"THEANO_MACRO_MOD({x}, (-{y}))"
+            x_mod_y_pm = f"AESARA_MACRO_MOD({x}, (-{y}))"
             x_div_y_mm = f"((-{x}) / (-{y}))"
             # If we are in a gpuarray kernel, %(fail)s exits the kernel,
             # and we do not have any error report, and we cannot set
@@ -2116,7 +2116,7 @@ def mod_check(x, y):
 class Mod(BinaryScalarOp):
     nfunc_spec = ("mod", 2, 1)
     complex_error = ComplexError(
-        "Theano does not support the mod operator (%) on "
+        "Aesara does not support the mod operator (%) on "
         "complex numbers, since numpy deprecated it."
     )
 
@@ -2132,7 +2132,7 @@ def c_support_code(self, **kwargs):
         # We use a macro as python use % as a special string character,
         # and the output of c_code may be run through another level
         # of string formatting.
-        return "#define THEANO_MACRO_MOD(x, y) (x % y)"
+        return "#define AESARA_MACRO_MOD(x, y) (x % y)"
 
     def c_code(self, node, name, inputs, outputs, sub):
         """
@@ -2154,10 +2154,10 @@ def c_code(self, node, name, inputs, outputs, sub):
             # keep them out of safety, and verify they are useless with an
             # assert.
             assert str(t) in map(str, discrete_types)
-            x_mod_y = f"THEANO_MACRO_MOD({x}, {y})"
-            x_mod_ymm = f"THEANO_MACRO_MOD(-{x}, -{y})"
-            x_mod_ypm = f"THEANO_MACRO_MOD({x}, -{y})"
-            x_mod_ymp = f"THEANO_MACRO_MOD(-{x}, {y})"
+            x_mod_y = f"AESARA_MACRO_MOD({x}, {y})"
+            x_mod_ymm = f"AESARA_MACRO_MOD(-{x}, -{y})"
+            x_mod_ypm = f"AESARA_MACRO_MOD({x}, -{y})"
+            x_mod_ymp = f"AESARA_MACRO_MOD(-{x}, {y})"
             # If we are in a gpuarray kernel, %(fail)s exits the kernel,
             # and we do not have any error report, and we cannot set
             # Python error messages either, so for now we just call the
@@ -2804,7 +2804,7 @@ def c_code(self, node, name, inputs, outputs, sub):
 
 class Neg(UnaryScalarOp):
     # We can use numpy.negative here, because even if it gives unexpected
-    # results on Boolean arrays, it will be passed other dtypes as Theano
+    # results on Boolean arrays, it will be passed other dtypes as Aesara
     # does not have a Boolean type for tensors.
     nfunc_spec = ("negative", 1, 1)
 
@@ -3980,7 +3980,7 @@ def init_c_code(self):
 
         The result is assigned to `self._c_code`.
         """
-        from theano.link.c.interface import CLinkerType
+        from aesara.link.c.interface import CLinkerType
 
         # It was already called
         if hasattr(self, "_c_code"):
@@ -4143,12 +4143,12 @@ def __init__(self, inputs, outputs):
             assert len(outputs) == 1
             # 1. Create a new graph from inputs up to the
             # Composite
-            res = theano.compile.rebuild_collect_shared(
+            res = aesara.compile.rebuild_collect_shared(
                 inputs=inputs, outputs=outputs[0].owner.inputs, copy_inputs_over=False
             )  # Clone also the inputs
             # 2. We continue this partial clone with the graph in
             # the inner Composite
-            res2 = theano.compile.rebuild_collect_shared(
+            res2 = aesara.compile.rebuild_collect_shared(
                 inputs=outputs[0].owner.op.inputs,
                 outputs=outputs[0].owner.op.outputs,
                 replace=dict(zip(outputs[0].owner.op.inputs, res[1])),
@@ -4196,7 +4196,7 @@ def make_node(self, *inputs):
         else:
             # Make a new op with the right input type.
             assert len(inputs) == self.nin
-            res = theano.compile.rebuild_collect_shared(
+            res = aesara.compile.rebuild_collect_shared(
                 self.outputs,
                 replace=dict(zip(self.inputs, inputs)),
                 rebuild_strict=False,
diff --git a/theano/scalar/basic_scipy.py b/aesara/scalar/basic_scipy.py
similarity index 99%
rename from theano/scalar/basic_scipy.py
rename to aesara/scalar/basic_scipy.py
index 68e555488c..df0dd0ceee 100644
--- a/theano/scalar/basic_scipy.py
+++ b/aesara/scalar/basic_scipy.py
@@ -8,9 +8,9 @@
 
 import numpy as np
 
-from theano.configdefaults import config
-from theano.gradient import grad_not_implemented
-from theano.scalar.basic import (
+from aesara.configdefaults import config
+from aesara.gradient import grad_not_implemented
+from aesara.scalar.basic import (
     BinaryScalarOp,
     UnaryScalarOp,
     complex_types,
diff --git a/theano/scalar/basic_sympy.py b/aesara/scalar/basic_sympy.py
similarity index 89%
rename from theano/scalar/basic_sympy.py
rename to aesara/scalar/basic_sympy.py
index 712d0ccdda..0299a03d3e 100644
--- a/theano/scalar/basic_sympy.py
+++ b/aesara/scalar/basic_sympy.py
@@ -1,6 +1,6 @@
 import itertools as it
 
-from theano.scalar.basic import Apply, ScalarOp, as_scalar, float32, float64, int64
+from aesara.scalar.basic import Apply, ScalarOp, as_scalar, float32, float64, int64
 
 
 imported_sympy = False
@@ -22,7 +22,7 @@ def sympy_dtype(expr):
     return get_default_datatype(expr).cname
 
 
-def theano_dtype(expr):
+def aesara_dtype(expr):
     return {"double": float64, "float": float32, "int": int64}[sympy_dtype(expr)]
 
 
@@ -33,15 +33,15 @@ class SymPyCCode(ScalarOp):
     Examples
     --------
     >>> from sympy.abc import x, y  # SymPy Variables
-    >>> from theano.scalar.basic_sympy import SymPyCCode
+    >>> from aesara.scalar.basic_sympy import SymPyCCode
     >>> op = SymPyCCode([x, y], x + y)
 
-    >>> from theano.scalar.basic import floats
-    >>> xt, yt = floats('xy') # Theano variables
+    >>> from aesara.scalar.basic import floats
+    >>> xt, yt = floats('xy') # Aesara variables
     >>> zt = op(xt, yt)
 
-    >>> import theano
-    >>> f = theano.function([xt, yt], zt)
+    >>> import aesara
+    >>> f = aesara.function([xt, yt], zt)
     >>> f(1.0, 2.0)
     3.0
 
@@ -81,7 +81,7 @@ def c_code(self, node, name, input_names, output_names, sub):
         return f"{y} = {f}({xs});"
 
     def output_types_preference(self, *inputs):
-        return [theano_dtype(self.expr)]
+        return [aesara_dtype(self.expr)]
 
     def make_node(self, *inputs):
         # TODO: assert input types are correct use get_default_datatype
diff --git a/theano/scalar/c_code/gamma.c b/aesara/scalar/c_code/gamma.c
similarity index 100%
rename from theano/scalar/c_code/gamma.c
rename to aesara/scalar/c_code/gamma.c
diff --git a/theano/scalar/sharedvar.py b/aesara/scalar/sharedvar.py
similarity index 86%
rename from theano/scalar/sharedvar.py
rename to aesara/scalar/sharedvar.py
index 37f9fc6f42..1af32f635d 100644
--- a/theano/scalar/sharedvar.py
+++ b/aesara/scalar/sharedvar.py
@@ -11,23 +11,18 @@
 We don't want to encourage people to use scalars (rather than 0-d tensors), but
 the reason is just to keep the docs simple, not because scalars are bad.  If we
 just don't register this shared variable constructor to handle any values by
-default when calling theano.shared(value) then users must really go out of their
+default when calling aesara.shared(value) then users must really go out of their
 way (as scan does) to create a shared variable of this kind.
 
 """
 
 import numpy as np
 
-from theano.compile import SharedVariable
+from aesara.compile import SharedVariable
 
 from .basic import Scalar, _scalar_py_operators
 
 
-__authors__ = "James Bergstra"
-__copyright__ = "(c) 2010, Universite de Montreal"
-__license__ = "3-clause BSD License"
-__contact__ = "theano-dev <theano-dev@googlegroups.com>"
-
 __docformat__ = "restructuredtext en"
 
 
diff --git a/theano/scan/__init__.py b/aesara/scan/__init__.py
similarity index 84%
rename from theano/scan/__init__.py
rename to aesara/scan/__init__.py
index 6e156f25b6..6ab4491d47 100644
--- a/theano/scan/__init__.py
+++ b/aesara/scan/__init__.py
@@ -19,7 +19,7 @@
   ignores previous steps of the outputs.
 
 Often a for-loop or while-loop can be expressed as a ``scan()`` operation,
-and ``scan`` is the closest that theano comes to looping. The advantages
+and ``scan`` is the closest that aesara comes to looping. The advantages
 of using ``scan`` over `for` loops in python (amongs other) are:
 
 * it allows the number of iterations to be part of the symbolic graph
@@ -47,13 +47,13 @@
 )
 __copyright__ = "(c) 2010, Universite de Montreal"
 
-from theano import configdefaults
+from aesara import configdefaults
 
 
 configdefaults.add_scan_configvars()
 
-from theano.scan import opt
-from theano.scan.basic import scan
-from theano.scan.checkpoints import scan_checkpoints
-from theano.scan.utils import until
-from theano.scan.views import foldl, foldr, map, reduce
+from aesara.scan import opt
+from aesara.scan.basic import scan
+from aesara.scan.checkpoints import scan_checkpoints
+from aesara.scan.utils import until
+from aesara.scan.views import foldl, foldr, map, reduce
diff --git a/theano/scan/basic.py b/aesara/scan/basic.py
similarity index 96%
rename from theano/scan/basic.py
rename to aesara/scan/basic.py
index 72785fb79b..2731c9c1e0 100644
--- a/theano/scan/basic.py
+++ b/aesara/scan/basic.py
@@ -14,26 +14,26 @@
 
 import numpy as np
 
-import theano.tensor as tt
-from theano.compile import SharedVariable, ops
-from theano.compile.function import function
-from theano.compile.mode import Mode
-from theano.configdefaults import config
-from theano.graph.basic import Constant, Variable, clone_replace, graph_inputs
-from theano.graph.fg import MissingInputError
-from theano.graph.op import get_test_value
-from theano.graph.utils import TestValueError
-from theano.scan import utils
-from theano.scan.op import Scan
-from theano.scan.utils import safe_new, traverse
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import minimum
-from theano.tensor.shape import shape_padleft
-from theano.tensor.type import TensorType, integer_dtypes
-from theano.updates import OrderedUpdates
-
-
-_logger = logging.getLogger("theano.scan.basic")
+import aesara.tensor as tt
+from aesara.compile import SharedVariable, ops
+from aesara.compile.function import function
+from aesara.compile.mode import Mode
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, Variable, clone_replace, graph_inputs
+from aesara.graph.fg import MissingInputError
+from aesara.graph.op import get_test_value
+from aesara.graph.utils import TestValueError
+from aesara.scan import utils
+from aesara.scan.op import Scan
+from aesara.scan.utils import safe_new, traverse
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import minimum
+from aesara.tensor.shape import shape_padleft
+from aesara.tensor.type import TensorType, integer_dtypes
+from aesara.updates import OrderedUpdates
+
+
+_logger = logging.getLogger("aesara.scan.basic")
 
 
 def scan(
@@ -59,7 +59,7 @@ def scan(
     fn
         ``fn`` is a function that describes the operations involved in one
         step of ``scan``. ``fn`` should construct variables describing the
-        output of one iteration step. It should expect as input theano
+        output of one iteration step. It should expect as input aesara
         variables representing all the slices of the input sequences
         and previous values of the outputs, as well as all other arguments
         given to scan as ``non_sequences``. The order in which scan passes
@@ -115,7 +115,7 @@ def scan(
 
         .. code-block:: python
 
-            import theano.tensor as tt
+            import aesara.tensor as tt
             W   = tt.matrix()
             W_2 = W**2
             def f(x):
@@ -148,13 +148,13 @@ def f(x):
         passed (and it is used to allocate memory if needed). = {}):
 
     sequences
-        ``sequences`` is the list of Theano variables or dictionaries
+        ``sequences`` is the list of Aesara variables or dictionaries
         describing the sequences ``scan`` has to iterate over. If a
         sequence is given as wrapped in a dictionary, then a set of optional
         information can be provided about the sequence. The dictionary
         should have the following keys:
 
-        * ``input`` (*mandatory*) -- Theano variable representing the
+        * ``input`` (*mandatory*) -- Aesara variable representing the
           sequence.
 
         * ``taps`` -- Temporal taps of the sequence required by ``fn``.
@@ -162,18 +162,18 @@ def f(x):
           impiles that at iteration step ``t`` scan will pass to ``fn``
           the slice ``t+k``. Default value is ``[0]``
 
-        Any Theano variable in the list ``sequences`` is automatically
+        Any Aesara variable in the list ``sequences`` is automatically
         wrapped into a dictionary where ``taps`` is set to ``[0]``
 
     outputs_info
-        ``outputs_info`` is the list of Theano variables or dictionaries
+        ``outputs_info`` is the list of Aesara variables or dictionaries
         describing the initial state of the outputs computed
         recurrently. When this initial states are given as dictionary
         optional information can be provided about the output corresponding
         to these initial states. The dictionary should have the following
         keys:
 
-        * ``initial`` -- Theano variable that represents the initial
+        * ``initial`` -- Aesara variable that represents the initial
           state of a given output. In case the output is not computed
           recursively (think of a map) and does not require an initial
           state this field can be skipped. Given that (only) the previous
@@ -231,7 +231,7 @@ def f(x):
 
     n_steps
         ``n_steps`` is the number of steps to iterate given as an int
-        or Theano scalar. If any of the input sequences do not have
+        or Aesara scalar. If any of the input sequences do not have
         enough elements, scan will raise an error. If the *value is 0* the
         outputs will have *0 rows*. If n_steps is not provided, ``scan`` will
         figure out the amount of steps it should run given its input
@@ -266,7 +266,7 @@ def f(x):
         be accurate). If you prefer the computations of one step of
         ``scan`` to be done differently then the entire function, you
         can use this parameter to describe how the computations in this
-        loop are done (see ``theano.function`` for details about
+        loop are done (see ``aesara.function`` for details about
         possible values and their meaning).
 
     profile
@@ -283,14 +283,14 @@ def f(x):
         set to None, this will use the value of config.scan__allow_gc.
 
         The full scan behavior related to allocation is determined by
-        this value and the Theano flag allow_gc. If the flag allow_gc
+        this value and the Aesara flag allow_gc. If the flag allow_gc
         is True (default) and this scan parameter allow_gc is False
         (default), then we let scan allocate all intermediate memory
         on the first iteration, those are not garbage collected them
         during that first iteration (this is determined by the scan
         allow_gc). This speed up allocation of the following
         iteration. But we free all those temp allocation at the end of
-        all iterations (this is what the Theano flag allow_gc mean).
+        all iterations (this is what the Aesara flag allow_gc mean).
 
         If you use preallocate and this scan is on GPU, the speed up
         from the scan allow_gc is small. If you are missing memory,
@@ -308,11 +308,11 @@ def f(x):
     -------
     tuple
         Tuple of the form (outputs, updates); ``outputs`` is either a
-        Theano variable or a list of Theano variables representing the
+        Aesara variable or a list of Aesara variables representing the
         outputs of ``scan`` (in the same order as in ``outputs_info``).
         ``updates`` is a subclass of dictionary specifying the update rules for
         all shared variables used in scan.
-        This dictionary should be passed to ``theano.function`` when you compile
+        This dictionary should be passed to ``aesara.function`` when you compile
         your function. The change compared to a normal dictionary is that we
         validate that keys are SharedVariable and addition of those dictionary
         are validated to be consistent.
@@ -446,7 +446,7 @@ def wrap_into_list(x):
     # for compiling a dummy function (Iteration #1)
     ##
 
-    # create theano inputs for the recursive function
+    # create aesara inputs for the recursive function
     # note : this is a first batch of possible inputs that will
     #        be compiled in a dummy function; we used this dummy
     #        function to detect shared variables and their updates
@@ -1006,7 +1006,7 @@ def wrap_into_list(x):
     # the file because that would force on the user some dependencies that we
     # might do not want to. Currently we are working on removing the
     # dependencies on sandbox code completeley.
-    from theano import gpuarray
+    from aesara import gpuarray
 
     if gpuarray.pygpu_activated:
         # very often we end up in this situation when we want to
diff --git a/theano/scan/c_code/scan_perform.c b/aesara/scan/c_code/scan_perform.c
similarity index 95%
rename from theano/scan/c_code/scan_perform.c
rename to aesara/scan/c_code/scan_perform.c
index 309958c147..2c26b11465 100644
--- a/theano/scan/c_code/scan_perform.c
+++ b/aesara/scan/c_code/scan_perform.c
@@ -602,8 +602,8 @@ static CYTHON_INLINE float __PYX_NAN() {
   #endif
 #endif
 
-#define __PYX_HAVE__theano__scan__scan_perform
-#define __PYX_HAVE_API__theano__scan__scan_perform
+#define __PYX_HAVE__aesara__scan__scan_perform
+#define __PYX_HAVE_API__aesara__scan__scan_perform
 /* Early includes */
 #include <string.h>
 #include <stdio.h>
@@ -611,7 +611,7 @@ static CYTHON_INLINE float __PYX_NAN() {
 #include "numpy/ufuncobject.h"
 
     /* NumPy API declarations from "numpy/__init__.pxd" */
-    
+
 #ifdef _OPENMP
 #include <omp.h>
 #endif /* _OPENMP */
@@ -883,17 +883,17 @@ typedef struct {
 } __Pyx_BufFmt_Context;
 
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":689
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":689
  * # in Cython to enable them only on the right systems.
- * 
+ *
  * ctypedef npy_int8       int8_t             # <<<<<<<<<<<<<<
  * ctypedef npy_int16      int16_t
  * ctypedef npy_int32      int32_t
  */
 typedef npy_int8 __pyx_t_5numpy_int8_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":690
- * 
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":690
+ *
  * ctypedef npy_int8       int8_t
  * ctypedef npy_int16      int16_t             # <<<<<<<<<<<<<<
  * ctypedef npy_int32      int32_t
@@ -901,7 +901,7 @@ typedef npy_int8 __pyx_t_5numpy_int8_t;
  */
 typedef npy_int16 __pyx_t_5numpy_int16_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":691
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":691
  * ctypedef npy_int8       int8_t
  * ctypedef npy_int16      int16_t
  * ctypedef npy_int32      int32_t             # <<<<<<<<<<<<<<
@@ -910,7 +910,7 @@ typedef npy_int16 __pyx_t_5numpy_int16_t;
  */
 typedef npy_int32 __pyx_t_5numpy_int32_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":692
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":692
  * ctypedef npy_int16      int16_t
  * ctypedef npy_int32      int32_t
  * ctypedef npy_int64      int64_t             # <<<<<<<<<<<<<<
@@ -919,17 +919,17 @@ typedef npy_int32 __pyx_t_5numpy_int32_t;
  */
 typedef npy_int64 __pyx_t_5numpy_int64_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":696
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":696
  * #ctypedef npy_int128     int128_t
- * 
+ *
  * ctypedef npy_uint8      uint8_t             # <<<<<<<<<<<<<<
  * ctypedef npy_uint16     uint16_t
  * ctypedef npy_uint32     uint32_t
  */
 typedef npy_uint8 __pyx_t_5numpy_uint8_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":697
- * 
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":697
+ *
  * ctypedef npy_uint8      uint8_t
  * ctypedef npy_uint16     uint16_t             # <<<<<<<<<<<<<<
  * ctypedef npy_uint32     uint32_t
@@ -937,7 +937,7 @@ typedef npy_uint8 __pyx_t_5numpy_uint8_t;
  */
 typedef npy_uint16 __pyx_t_5numpy_uint16_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":698
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":698
  * ctypedef npy_uint8      uint8_t
  * ctypedef npy_uint16     uint16_t
  * ctypedef npy_uint32     uint32_t             # <<<<<<<<<<<<<<
@@ -946,7 +946,7 @@ typedef npy_uint16 __pyx_t_5numpy_uint16_t;
  */
 typedef npy_uint32 __pyx_t_5numpy_uint32_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":699
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":699
  * ctypedef npy_uint16     uint16_t
  * ctypedef npy_uint32     uint32_t
  * ctypedef npy_uint64     uint64_t             # <<<<<<<<<<<<<<
@@ -955,17 +955,17 @@ typedef npy_uint32 __pyx_t_5numpy_uint32_t;
  */
 typedef npy_uint64 __pyx_t_5numpy_uint64_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":703
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":703
  * #ctypedef npy_uint128    uint128_t
- * 
+ *
  * ctypedef npy_float32    float32_t             # <<<<<<<<<<<<<<
  * ctypedef npy_float64    float64_t
  * #ctypedef npy_float80    float80_t
  */
 typedef npy_float32 __pyx_t_5numpy_float32_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":704
- * 
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":704
+ *
  * ctypedef npy_float32    float32_t
  * ctypedef npy_float64    float64_t             # <<<<<<<<<<<<<<
  * #ctypedef npy_float80    float80_t
@@ -973,7 +973,7 @@ typedef npy_float32 __pyx_t_5numpy_float32_t;
  */
 typedef npy_float64 __pyx_t_5numpy_float64_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":713
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":713
  * # The int types are mapped a bit surprising --
  * # numpy.int corresponds to 'l' and numpy.long to 'q'
  * ctypedef npy_long       int_t             # <<<<<<<<<<<<<<
@@ -982,92 +982,92 @@ typedef npy_float64 __pyx_t_5numpy_float64_t;
  */
 typedef npy_long __pyx_t_5numpy_int_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":714
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":714
  * # numpy.int corresponds to 'l' and numpy.long to 'q'
  * ctypedef npy_long       int_t
  * ctypedef npy_longlong   long_t             # <<<<<<<<<<<<<<
  * ctypedef npy_longlong   longlong_t
- * 
+ *
  */
 typedef npy_longlong __pyx_t_5numpy_long_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":715
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":715
  * ctypedef npy_long       int_t
  * ctypedef npy_longlong   long_t
  * ctypedef npy_longlong   longlong_t             # <<<<<<<<<<<<<<
- * 
+ *
  * ctypedef npy_ulong      uint_t
  */
 typedef npy_longlong __pyx_t_5numpy_longlong_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":717
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":717
  * ctypedef npy_longlong   longlong_t
- * 
+ *
  * ctypedef npy_ulong      uint_t             # <<<<<<<<<<<<<<
  * ctypedef npy_ulonglong  ulong_t
  * ctypedef npy_ulonglong  ulonglong_t
  */
 typedef npy_ulong __pyx_t_5numpy_uint_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":718
- * 
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":718
+ *
  * ctypedef npy_ulong      uint_t
  * ctypedef npy_ulonglong  ulong_t             # <<<<<<<<<<<<<<
  * ctypedef npy_ulonglong  ulonglong_t
- * 
+ *
  */
 typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":719
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":719
  * ctypedef npy_ulong      uint_t
  * ctypedef npy_ulonglong  ulong_t
  * ctypedef npy_ulonglong  ulonglong_t             # <<<<<<<<<<<<<<
- * 
+ *
  * ctypedef npy_intp       intp_t
  */
 typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":721
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":721
  * ctypedef npy_ulonglong  ulonglong_t
- * 
+ *
  * ctypedef npy_intp       intp_t             # <<<<<<<<<<<<<<
  * ctypedef npy_uintp      uintp_t
- * 
+ *
  */
 typedef npy_intp __pyx_t_5numpy_intp_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":722
- * 
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":722
+ *
  * ctypedef npy_intp       intp_t
  * ctypedef npy_uintp      uintp_t             # <<<<<<<<<<<<<<
- * 
+ *
  * ctypedef npy_double     float_t
  */
 typedef npy_uintp __pyx_t_5numpy_uintp_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":724
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":724
  * ctypedef npy_uintp      uintp_t
- * 
+ *
  * ctypedef npy_double     float_t             # <<<<<<<<<<<<<<
  * ctypedef npy_double     double_t
  * ctypedef npy_longdouble longdouble_t
  */
 typedef npy_double __pyx_t_5numpy_float_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":725
- * 
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":725
+ *
  * ctypedef npy_double     float_t
  * ctypedef npy_double     double_t             # <<<<<<<<<<<<<<
  * ctypedef npy_longdouble longdouble_t
- * 
+ *
  */
 typedef npy_double __pyx_t_5numpy_double_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":726
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":726
  * ctypedef npy_double     float_t
  * ctypedef npy_double     double_t
  * ctypedef npy_longdouble longdouble_t             # <<<<<<<<<<<<<<
- * 
+ *
  * ctypedef npy_cfloat      cfloat_t
  */
 typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
@@ -1098,38 +1098,38 @@ static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(do
 
 /*--- Type declarations ---*/
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":728
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":728
  * ctypedef npy_longdouble longdouble_t
- * 
+ *
  * ctypedef npy_cfloat      cfloat_t             # <<<<<<<<<<<<<<
  * ctypedef npy_cdouble     cdouble_t
  * ctypedef npy_clongdouble clongdouble_t
  */
 typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":729
- * 
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":729
+ *
  * ctypedef npy_cfloat      cfloat_t
  * ctypedef npy_cdouble     cdouble_t             # <<<<<<<<<<<<<<
  * ctypedef npy_clongdouble clongdouble_t
- * 
+ *
  */
 typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":730
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":730
  * ctypedef npy_cfloat      cfloat_t
  * ctypedef npy_cdouble     cdouble_t
  * ctypedef npy_clongdouble clongdouble_t             # <<<<<<<<<<<<<<
- * 
+ *
  * ctypedef npy_cdouble     complex_t
  */
 typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":732
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":732
  * ctypedef npy_clongdouble clongdouble_t
- * 
+ *
  * ctypedef npy_cdouble     complex_t             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline object PyArray_MultiIterNew1(a):
  */
 typedef npy_cdouble __pyx_t_5numpy_complex_t;
@@ -1771,13 +1771,13 @@ static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
 static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
 static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
 
-/* Module declarations from 'theano.scan.scan_perform' */
+/* Module declarations from 'aesara.scan.scan_perform' */
 static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t = { "int32_t", NULL, sizeof(__pyx_t_5numpy_int32_t), { 0 }, 0, IS_UNSIGNED(__pyx_t_5numpy_int32_t) ? 'U' : 'I', IS_UNSIGNED(__pyx_t_5numpy_int32_t), 0 };
-#define __Pyx_MODULE_NAME "theano.scan.scan_perform"
-extern int __pyx_module_is_main_theano__scan__scan_perform;
-int __pyx_module_is_main_theano__scan__scan_perform = 0;
+#define __Pyx_MODULE_NAME "aesara.scan.scan_perform"
+extern int __pyx_module_is_main_aesara__scan__scan_perform;
+int __pyx_module_is_main_aesara__scan__scan_perform = 0;
 
-/* Implementation of 'theano.scan.scan_perform' */
+/* Implementation of 'aesara.scan.scan_perform' */
 static PyObject *__pyx_builtin_IndexError;
 static PyObject *__pyx_builtin_NotImplementedError;
 static PyObject *__pyx_builtin_range;
@@ -1900,7 +1900,7 @@ static const char __pyx_k_scan_perform_pyx[] = "scan_perform.pyx";
 static const char __pyx_k_mitmot_inp_offset[] = "mitmot_inp_offset";
 static const char __pyx_k_position_of_error[] = "position_of_error";
 static const char __pyx_k_shared_arg_offset[] = "shared_arg_offset";
-static const char __pyx_k_theano_link_utils[] = "theano.link.utils";
+static const char __pyx_k_aesara_link_utils[] = "aesara.link.utils";
 static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
 static const char __pyx_k_len_output_storage[] = "len_output_storage";
 static const char __pyx_k_mit_mot_out_slices[] = "mit_mot_out_slices";
@@ -1912,12 +1912,12 @@ static const char __pyx_k_mit_mot_out_nslices[] = "mit_mot_out_nslices";
 static const char __pyx_k_mitmots_preallocated[] = "mitmots_preallocated";
 static const char __pyx_k_old_mitmot_input_data[] = "old_mitmot_input_data";
 static const char __pyx_k_old_mitmot_input_storage[] = "old_mitmot_input_storage";
-static const char __pyx_k_theano_scan_scan_perform[] = "theano.scan.scan_perform";
+static const char __pyx_k_aesara_scan_scan_perform[] = "aesara.scan.scan_perform";
 static const char __pyx_k_Razvan_PascanuPyMC_Developers[] = "Razvan PascanuPyMC Developers";
 static const char __pyx_k_c_2011_Universite_de_Montreal[] = "(c) 2011, Universite de Montreal";
 static const char __pyx_k_This_code_implements_the_operat[] = "\n This code implements the operations that scan has to carry on when called\n as a stand alone function.\n\n IF anything this is the entire code that needs to be transported to C.\n\n Short description of how this code works:\n     Scan divides its inputs ( Op's inputs) into different classes of inputs\n     as follows:\n         i) sequences : inputs over which scan loops to get data. Nothing is\n         written into them ( they are readonly, loop over)\n\n         ii) mit_mot : multiple input taps multiple output taps arguments.\n         These are inputs over which scan loops and gets data but into which\n         scan also writes data. The shorthand mit_mot describes how scan\n         deal with them at each step : at each step take several slices as\n         input and produce sevaral slices as outputs\n\n         iii) mit_sot : multiple input taps single output tap arguments.\n         As before scan reads from these but also writes. At each step scan\n         uses several slices as input but produces only one as output\n\n         iv) sit_sot : single input tap single output tap arguments.\n         At each step use only the previous slice as input, produce only one\n         slice as output\n\n         v) nit_sot: no input tap single output tap arguments.\n         At each step don't use any previous values, only produce new onese\n\n         vi) shared_outs: arguments corresponding to shared variables with\n         updates.\n         At each step use its value as input, and afterwards replace it with\n         a new value.\n         vii) other_args: arguments that are passed to every call of the\n         inner function as they are ( no slicing is perfomed)\n\n    All these outputs are one after the other in the inputs list (named in\n    this code as args) in a given order ( namely the one described above\n    with little discrepencies depending if we are talking about the outputs\n    of the Scan op or the inputs of the Scan op Node, and if we are tal""king\n    about the inputs of the inner function of scan or of the scan op).\n\n    Because of this, all we need to be able to separate and tell arguments\n    apart is how many of which we have as well as how many taps and which\n    ones (where applicable). All this information is desribed (more or less)\n    by describing the arguments of this function)\n";
 static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import";
-static const char __pyx_k_An_output_of_the_scan_has_change[] = "An output of the scan has changed shape. This may be caused by a pushout optimization. Try adding 'optimizer_excluding=scanOp_pushout_output' to your Theano flags.";
+static const char __pyx_k_An_output_of_the_scan_has_change[] = "An output of the scan has changed shape. This may be caused by a pushout optimization. Try adding 'optimizer_excluding=scanOp_pushout_output' to your Aesara flags.";
 static const char __pyx_k_Scan_was_asked_to_run_for_negati[] = "Scan was asked to run for negative number of step %d";
 static const char __pyx_k_Sequence_is_shorter_than_the_req[] = "Sequence is shorter than the required number of steps : (n_steps, seq, seq.shape):";
 static const char __pyx_k_We_didn_t_implemented_yet_the_ca[] = "We didn't implemented yet the case where scan do 0 iteration";
@@ -2045,8 +2045,8 @@ static PyObject *__pyx_n_s_tap_array;
 static PyObject *__pyx_n_s_tap_array_len;
 static PyObject *__pyx_n_s_tdx;
 static PyObject *__pyx_n_s_test;
-static PyObject *__pyx_n_s_theano_link_utils;
-static PyObject *__pyx_n_s_theano_scan_scan_perform;
+static PyObject *__pyx_n_s_aesara_link_utils;
+static PyObject *__pyx_n_s_aesara_scan_scan_perform;
 static PyObject *__pyx_n_s_thunks;
 static PyObject *__pyx_n_u_thunks;
 static PyObject *__pyx_n_s_time;
@@ -2062,8 +2062,8 @@ static PyObject *__pyx_n_s_vector_seqs;
 static PyObject *__pyx_n_s_vm_call_time;
 static PyObject *__pyx_n_s_xrange;
 static PyObject *__pyx_n_s_zip;
-static PyObject *__pyx_pf_6theano_4scan_12scan_perform_get_version(CYTHON_UNUSED PyObject *__pyx_self); /* proto */
-static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED PyObject *__pyx_self, unsigned int __pyx_v_n_shared_outs, unsigned int __pyx_v_n_mit_mot_outs, unsigned int __pyx_v_n_seqs, unsigned int __pyx_v_n_mit_mot, unsigned int __pyx_v_n_mit_sot, unsigned int __pyx_v_n_sit_sot, unsigned int __pyx_v_n_nit_sot, int __pyx_v_n_steps, int __pyx_v_as_while, PyArrayObject *__pyx_v_mintaps, PyArrayObject *__pyx_v_tap_array, PyArrayObject *__pyx_v_tap_array_len, PyArrayObject *__pyx_v_vector_seqs, PyArrayObject *__pyx_v_vector_outs, CYTHON_UNUSED PyArrayObject *__pyx_v_mit_mot_out_slices, CYTHON_UNUSED PyArrayObject *__pyx_v_mit_mot_out_nslices, PyArrayObject *__pyx_v_mitmots_preallocated, PyArrayObject *__pyx_v_inps_is_tensor, PyArrayObject *__pyx_v_outs_is_tensor, PyObject *__pyx_v_fn, PyObject *__pyx_v_fnct, PyArrayObject *__pyx_v_destroy_map, PyObject *__pyx_v_args, PyObject *__pyx_v_outs, PyObject *__pyx_v_self, PyObject *__pyx_v_node); /* proto */
+static PyObject *__pyx_pf_6aesara_4scan_12scan_perform_get_version(CYTHON_UNUSED PyObject *__pyx_self); /* proto */
+static PyObject *__pyx_pf_6aesara_4scan_12scan_perform_2perform(CYTHON_UNUSED PyObject *__pyx_self, unsigned int __pyx_v_n_shared_outs, unsigned int __pyx_v_n_mit_mot_outs, unsigned int __pyx_v_n_seqs, unsigned int __pyx_v_n_mit_mot, unsigned int __pyx_v_n_mit_sot, unsigned int __pyx_v_n_sit_sot, unsigned int __pyx_v_n_nit_sot, int __pyx_v_n_steps, int __pyx_v_as_while, PyArrayObject *__pyx_v_mintaps, PyArrayObject *__pyx_v_tap_array, PyArrayObject *__pyx_v_tap_array_len, PyArrayObject *__pyx_v_vector_seqs, PyArrayObject *__pyx_v_vector_outs, CYTHON_UNUSED PyArrayObject *__pyx_v_mit_mot_out_slices, CYTHON_UNUSED PyArrayObject *__pyx_v_mit_mot_out_nslices, PyArrayObject *__pyx_v_mitmots_preallocated, PyArrayObject *__pyx_v_inps_is_tensor, PyArrayObject *__pyx_v_outs_is_tensor, PyObject *__pyx_v_fn, PyObject *__pyx_v_fnct, PyArrayObject *__pyx_v_destroy_map, PyObject *__pyx_v_args, PyObject *__pyx_v_outs, PyObject *__pyx_v_self, PyObject *__pyx_v_node); /* proto */
 static PyObject *__pyx_float_0_298;
 static PyObject *__pyx_int_0;
 static PyObject *__pyx_int_1;
@@ -2080,38 +2080,38 @@ static PyObject *__pyx_codeobj__8;
 static PyObject *__pyx_codeobj__10;
 /* Late includes */
 
-/* "theano/scan/scan_perform.pyx":66
- * 
- * 
+/* "aesara/scan/scan_perform.pyx":66
+ *
+ *
  * def get_version():             # <<<<<<<<<<<<<<
  *     return 0.298
- * 
+ *
  */
 
 /* Python wrapper */
-static PyObject *__pyx_pw_6theano_4scan_12scan_perform_1get_version(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/
-static PyMethodDef __pyx_mdef_6theano_4scan_12scan_perform_1get_version = {"get_version", (PyCFunction)__pyx_pw_6theano_4scan_12scan_perform_1get_version, METH_NOARGS, 0};
-static PyObject *__pyx_pw_6theano_4scan_12scan_perform_1get_version(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) {
+static PyObject *__pyx_pw_6aesara_4scan_12scan_perform_1get_version(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyMethodDef __pyx_mdef_6aesara_4scan_12scan_perform_1get_version = {"get_version", (PyCFunction)__pyx_pw_6aesara_4scan_12scan_perform_1get_version, METH_NOARGS, 0};
+static PyObject *__pyx_pw_6aesara_4scan_12scan_perform_1get_version(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) {
   PyObject *__pyx_r = 0;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("get_version (wrapper)", 0);
-  __pyx_r = __pyx_pf_6theano_4scan_12scan_perform_get_version(__pyx_self);
+  __pyx_r = __pyx_pf_6aesara_4scan_12scan_perform_get_version(__pyx_self);
 
   /* function exit code */
   __Pyx_RefNannyFinishContext();
   return __pyx_r;
 }
 
-static PyObject *__pyx_pf_6theano_4scan_12scan_perform_get_version(CYTHON_UNUSED PyObject *__pyx_self) {
+static PyObject *__pyx_pf_6aesara_4scan_12scan_perform_get_version(CYTHON_UNUSED PyObject *__pyx_self) {
   PyObject *__pyx_r = NULL;
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("get_version", 0);
 
-  /* "theano/scan/scan_perform.pyx":67
- * 
+  /* "aesara/scan/scan_perform.pyx":67
+ *
  * def get_version():
  *     return 0.298             # <<<<<<<<<<<<<<
- * 
+ *
  * @cython.boundscheck(False)
  */
   __Pyx_XDECREF(__pyx_r);
@@ -2119,12 +2119,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_get_version(CYTHON_UNUSED
   __pyx_r = __pyx_float_0_298;
   goto __pyx_L0;
 
-  /* "theano/scan/scan_perform.pyx":66
- * 
- * 
+  /* "aesara/scan/scan_perform.pyx":66
+ *
+ *
  * def get_version():             # <<<<<<<<<<<<<<
  *     return 0.298
- * 
+ *
  */
 
   /* function exit code */
@@ -2134,8 +2134,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_get_version(CYTHON_UNUSED
   return __pyx_r;
 }
 
-/* "theano/scan/scan_perform.pyx":70
- * 
+/* "aesara/scan/scan_perform.pyx":70
+ *
  * @cython.boundscheck(False)
  * def perform(             # <<<<<<<<<<<<<<
  *             unsigned int n_shared_outs,
@@ -2143,10 +2143,10 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_get_version(CYTHON_UNUSED
  */
 
 /* Python wrapper */
-static PyObject *__pyx_pw_6theano_4scan_12scan_perform_3perform(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static char __pyx_doc_6theano_4scan_12scan_perform_2perform[] = "\n    Parameters\n    ----------\n    n_shared_outs: unsigned int\n        Number of arugments that correspond to shared variables with\n        updates\n    n_mit_mot_outs: unsigned int\n        Sum over the number of output taps for each mit_mot sequence\n    n_seqs: unsigned int\n        Number of sequences provided as input\n    n_mit_mot : unsigned int\n        Number of mit_mot arguemnts\n    n_mit_sot: unsigned int\n        Number of mit_sot arguments\n    n_sit_sot: unsigned int\n        Number of sit sot arguemnts\n    n_nit_sot: unsigned int\n        Number of nit_sot arguments\n    n_steps: unsigned int\n        Number of steps to loop over\n    mintaps: int32 ndarray (can also be a simple python list if that is better !)\n        For any of the mit_mot, mit_sot, sit_sot says which is the furtherst\n        away input tap from current position. For example, if the taps where [-2,\n        -5, -9], the mintap would be -9. For sit_sot this is always -1 since\n        is the only allowed tap.\n    tap_array: int32 ndarray( can be replaced by a list of list in python if better)\n        For each of the mit_mot, mit_sot, sit_sot (the first dimension) says\n        which are the corresponding input taps. While this is a matrix, not all\n        values in a row are needed and tap_array_len is there to say up to\n        which entry we are dealing with valid taps ( afterwards there are\n        just 0s to ensure the fix format)\n    tap_array_len: int32 ndarray( can be replaced by a list if better)\n        For each of the mit_mot, mit_sot, sit_sot says how many input taps\n        each has. For sit_sot this will always be 1.\n    vector_seqs: int32 ndarray (can be replaced by a list of bools if better)\n        For each sequence the corresponding entry is either a 1, is the\n        sequence is a vector or 0 if it has more than 1 dimension\n    vector_outs: int32 ndarray( can be replaced by list of bools if better)\n        For each output ( mit_mot, mit_sot, si""t_sot, nit_sot in this order)\n        the entry is 1 if the corresponding argument is a 1 dimensional\n        tensor, 0 otherwise.\n    mit_mot_out_slices : int32 ndarray( can be replaced by list of lists)\n        Same as tap_array, but for the output taps of mit_mot sequences\n    mit_mot_out_nslices: int32 ndarray (Can be replaced by a list)\n        Same as tap_array_len, but is the number of output taps of the\n        mit_mot sequences (i.e. it corresponds to mit_mot_out_slices)\n    inps_is_tensor : int32 ndarray (Can be replaced by a list)\n        Array of boolean indicating, for every input, whether it is a tensor\n        or not\n    outs_is_tensor : int32 ndarray (Can be replaced by a list)\n        Array of boolean indicating, for every output, whether it is a tensor\n        or not\n    fn: callable\n        This is the linker, i.e. the function that will loop over the\n        computational graph and call the perform of each operation. For this\n        linker there is a c version in graph/lazy_linker.c that will be the\n        starting point of implementing this function in C ( we need to take\n        all the code around the call of this function and put in C inside\n        that code)\n    fnct: python object\n        Only used to attach some timings for the profile mode ( can be\n        skiped if we don't care about Theano's profile mode)\n    destroy_map\n        Array of boolean saying if an output is computed inplace\n    args: list of ndarrays (and random states)\n        The inputs of scan in a given order ( n_steps, sequences, mit_mot,\n        mit_sot, sit_sot, nit_sot, shared_outs, other_args)\n    outs: list of 1 element list ( or storage objects?)\n        This is where we need to copy our outputs ( we don't return the\n        results, though we can change the code such that we return, and\n        figure things out on the outside - python)\n    self: python object\n        The scan op itself. I only use it to attach to it some timi""ng\n        informations .. but I don;t need to.\n\n    ";
-static PyMethodDef __pyx_mdef_6theano_4scan_12scan_perform_3perform = {"perform", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_6theano_4scan_12scan_perform_3perform, METH_VARARGS|METH_KEYWORDS, __pyx_doc_6theano_4scan_12scan_perform_2perform};
-static PyObject *__pyx_pw_6theano_4scan_12scan_perform_3perform(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+static PyObject *__pyx_pw_6aesara_4scan_12scan_perform_3perform(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_6aesara_4scan_12scan_perform_2perform[] = "\n    Parameters\n    ----------\n    n_shared_outs: unsigned int\n        Number of arugments that correspond to shared variables with\n        updates\n    n_mit_mot_outs: unsigned int\n        Sum over the number of output taps for each mit_mot sequence\n    n_seqs: unsigned int\n        Number of sequences provided as input\n    n_mit_mot : unsigned int\n        Number of mit_mot arguemnts\n    n_mit_sot: unsigned int\n        Number of mit_sot arguments\n    n_sit_sot: unsigned int\n        Number of sit sot arguemnts\n    n_nit_sot: unsigned int\n        Number of nit_sot arguments\n    n_steps: unsigned int\n        Number of steps to loop over\n    mintaps: int32 ndarray (can also be a simple python list if that is better !)\n        For any of the mit_mot, mit_sot, sit_sot says which is the furtherst\n        away input tap from current position. For example, if the taps where [-2,\n        -5, -9], the mintap would be -9. For sit_sot this is always -1 since\n        is the only allowed tap.\n    tap_array: int32 ndarray( can be replaced by a list of list in python if better)\n        For each of the mit_mot, mit_sot, sit_sot (the first dimension) says\n        which are the corresponding input taps. While this is a matrix, not all\n        values in a row are needed and tap_array_len is there to say up to\n        which entry we are dealing with valid taps ( afterwards there are\n        just 0s to ensure the fix format)\n    tap_array_len: int32 ndarray( can be replaced by a list if better)\n        For each of the mit_mot, mit_sot, sit_sot says how many input taps\n        each has. For sit_sot this will always be 1.\n    vector_seqs: int32 ndarray (can be replaced by a list of bools if better)\n        For each sequence the corresponding entry is either a 1, is the\n        sequence is a vector or 0 if it has more than 1 dimension\n    vector_outs: int32 ndarray( can be replaced by list of bools if better)\n        For each output ( mit_mot, mit_sot, si""t_sot, nit_sot in this order)\n        the entry is 1 if the corresponding argument is a 1 dimensional\n        tensor, 0 otherwise.\n    mit_mot_out_slices : int32 ndarray( can be replaced by list of lists)\n        Same as tap_array, but for the output taps of mit_mot sequences\n    mit_mot_out_nslices: int32 ndarray (Can be replaced by a list)\n        Same as tap_array_len, but is the number of output taps of the\n        mit_mot sequences (i.e. it corresponds to mit_mot_out_slices)\n    inps_is_tensor : int32 ndarray (Can be replaced by a list)\n        Array of boolean indicating, for every input, whether it is a tensor\n        or not\n    outs_is_tensor : int32 ndarray (Can be replaced by a list)\n        Array of boolean indicating, for every output, whether it is a tensor\n        or not\n    fn: callable\n        This is the linker, i.e. the function that will loop over the\n        computational graph and call the perform of each operation. For this\n        linker there is a c version in graph/lazy_linker.c that will be the\n        starting point of implementing this function in C ( we need to take\n        all the code around the call of this function and put in C inside\n        that code)\n    fnct: python object\n        Only used to attach some timings for the profile mode ( can be\n        skiped if we don't care about Aesara's profile mode)\n    destroy_map\n        Array of boolean saying if an output is computed inplace\n    args: list of ndarrays (and random states)\n        The inputs of scan in a given order ( n_steps, sequences, mit_mot,\n        mit_sot, sit_sot, nit_sot, shared_outs, other_args)\n    outs: list of 1 element list ( or storage objects?)\n        This is where we need to copy our outputs ( we don't return the\n        results, though we can change the code such that we return, and\n        figure things out on the outside - python)\n    self: python object\n        The scan op itself. I only use it to attach to it some timi""ng\n        informations .. but I don;t need to.\n\n    ";
+static PyMethodDef __pyx_mdef_6aesara_4scan_12scan_perform_3perform = {"perform", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_6aesara_4scan_12scan_perform_3perform, METH_VARARGS|METH_KEYWORDS, __pyx_doc_6aesara_4scan_12scan_perform_2perform};
+static PyObject *__pyx_pw_6aesara_4scan_12scan_perform_3perform(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
   unsigned int __pyx_v_n_shared_outs;
   unsigned int __pyx_v_n_mit_mot_outs;
   unsigned int __pyx_v_n_seqs;
@@ -2461,7 +2461,7 @@ static PyObject *__pyx_pw_6theano_4scan_12scan_perform_3perform(PyObject *__pyx_
   __pyx_L5_argtuple_error:;
   __Pyx_RaiseArgtupleInvalid("perform", 1, 26, 26, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 70, __pyx_L3_error)
   __pyx_L3_error:;
-  __Pyx_AddTraceback("theano.scan.scan_perform.perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_AddTraceback("aesara.scan.scan_perform.perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __Pyx_RefNannyFinishContext();
   return NULL;
   __pyx_L4_argument_unpacking_done:;
@@ -2476,7 +2476,7 @@ static PyObject *__pyx_pw_6theano_4scan_12scan_perform_3perform(PyObject *__pyx_
   if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_inps_is_tensor), __pyx_ptype_5numpy_ndarray, 1, "inps_is_tensor", 0))) __PYX_ERR(0, 88, __pyx_L1_error)
   if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_outs_is_tensor), __pyx_ptype_5numpy_ndarray, 1, "outs_is_tensor", 0))) __PYX_ERR(0, 89, __pyx_L1_error)
   if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_destroy_map), __pyx_ptype_5numpy_ndarray, 1, "destroy_map", 0))) __PYX_ERR(0, 92, __pyx_L1_error)
-  __pyx_r = __pyx_pf_6theano_4scan_12scan_perform_2perform(__pyx_self, __pyx_v_n_shared_outs, __pyx_v_n_mit_mot_outs, __pyx_v_n_seqs, __pyx_v_n_mit_mot, __pyx_v_n_mit_sot, __pyx_v_n_sit_sot, __pyx_v_n_nit_sot, __pyx_v_n_steps, __pyx_v_as_while, __pyx_v_mintaps, __pyx_v_tap_array, __pyx_v_tap_array_len, __pyx_v_vector_seqs, __pyx_v_vector_outs, __pyx_v_mit_mot_out_slices, __pyx_v_mit_mot_out_nslices, __pyx_v_mitmots_preallocated, __pyx_v_inps_is_tensor, __pyx_v_outs_is_tensor, __pyx_v_fn, __pyx_v_fnct, __pyx_v_destroy_map, __pyx_v_args, __pyx_v_outs, __pyx_v_self, __pyx_v_node);
+  __pyx_r = __pyx_pf_6aesara_4scan_12scan_perform_2perform(__pyx_self, __pyx_v_n_shared_outs, __pyx_v_n_mit_mot_outs, __pyx_v_n_seqs, __pyx_v_n_mit_mot, __pyx_v_n_mit_sot, __pyx_v_n_sit_sot, __pyx_v_n_nit_sot, __pyx_v_n_steps, __pyx_v_as_while, __pyx_v_mintaps, __pyx_v_tap_array, __pyx_v_tap_array_len, __pyx_v_vector_seqs, __pyx_v_vector_outs, __pyx_v_mit_mot_out_slices, __pyx_v_mit_mot_out_nslices, __pyx_v_mitmots_preallocated, __pyx_v_inps_is_tensor, __pyx_v_outs_is_tensor, __pyx_v_fn, __pyx_v_fnct, __pyx_v_destroy_map, __pyx_v_args, __pyx_v_outs, __pyx_v_self, __pyx_v_node);
 
   /* function exit code */
   goto __pyx_L0;
@@ -2487,7 +2487,7 @@ static PyObject *__pyx_pw_6theano_4scan_12scan_perform_3perform(PyObject *__pyx_
   return __pyx_r;
 }
 
-static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED PyObject *__pyx_self, unsigned int __pyx_v_n_shared_outs, unsigned int __pyx_v_n_mit_mot_outs, unsigned int __pyx_v_n_seqs, unsigned int __pyx_v_n_mit_mot, unsigned int __pyx_v_n_mit_sot, unsigned int __pyx_v_n_sit_sot, unsigned int __pyx_v_n_nit_sot, int __pyx_v_n_steps, int __pyx_v_as_while, PyArrayObject *__pyx_v_mintaps, PyArrayObject *__pyx_v_tap_array, PyArrayObject *__pyx_v_tap_array_len, PyArrayObject *__pyx_v_vector_seqs, PyArrayObject *__pyx_v_vector_outs, CYTHON_UNUSED PyArrayObject *__pyx_v_mit_mot_out_slices, CYTHON_UNUSED PyArrayObject *__pyx_v_mit_mot_out_nslices, PyArrayObject *__pyx_v_mitmots_preallocated, PyArrayObject *__pyx_v_inps_is_tensor, PyArrayObject *__pyx_v_outs_is_tensor, PyObject *__pyx_v_fn, PyObject *__pyx_v_fnct, PyArrayObject *__pyx_v_destroy_map, PyObject *__pyx_v_args, PyObject *__pyx_v_outs, PyObject *__pyx_v_self, PyObject *__pyx_v_node) {
+static PyObject *__pyx_pf_6aesara_4scan_12scan_perform_2perform(CYTHON_UNUSED PyObject *__pyx_self, unsigned int __pyx_v_n_shared_outs, unsigned int __pyx_v_n_mit_mot_outs, unsigned int __pyx_v_n_seqs, unsigned int __pyx_v_n_mit_mot, unsigned int __pyx_v_n_mit_sot, unsigned int __pyx_v_n_sit_sot, unsigned int __pyx_v_n_nit_sot, int __pyx_v_n_steps, int __pyx_v_as_while, PyArrayObject *__pyx_v_mintaps, PyArrayObject *__pyx_v_tap_array, PyArrayObject *__pyx_v_tap_array_len, PyArrayObject *__pyx_v_vector_seqs, PyArrayObject *__pyx_v_vector_outs, CYTHON_UNUSED PyArrayObject *__pyx_v_mit_mot_out_slices, CYTHON_UNUSED PyArrayObject *__pyx_v_mit_mot_out_nslices, PyArrayObject *__pyx_v_mitmots_preallocated, PyArrayObject *__pyx_v_inps_is_tensor, PyArrayObject *__pyx_v_outs_is_tensor, PyObject *__pyx_v_fn, PyObject *__pyx_v_fnct, PyArrayObject *__pyx_v_destroy_map, PyObject *__pyx_v_args, PyObject *__pyx_v_outs, PyObject *__pyx_v_self, PyObject *__pyx_v_node) {
   PyObject *__pyx_v_t0_call = NULL;
   PyObject *__pyx_v_t_fn = NULL;
   unsigned int __pyx_v_n_outs;
@@ -2713,7 +2713,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   }
   __pyx_pybuffernd_destroy_map.diminfo[0].strides = __pyx_pybuffernd_destroy_map.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_destroy_map.diminfo[0].shape = __pyx_pybuffernd_destroy_map.rcbuffer->pybuffer.shape[0];
 
-  /* "theano/scan/scan_perform.pyx":175
+  /* "aesara/scan/scan_perform.pyx":175
  *     # 1. Unzip the number of steps and sequences. If number of steps is
  *     # negative flip sequences around, and make n_steps positive
  *     t0_call = time.time()             # <<<<<<<<<<<<<<
@@ -2743,7 +2743,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_t0_call = __pyx_t_1;
   __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":176
+  /* "aesara/scan/scan_perform.pyx":176
  *     # negative flip sequences around, and make n_steps positive
  *     t0_call = time.time()
  *     t_fn = 0             # <<<<<<<<<<<<<<
@@ -2753,7 +2753,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __Pyx_INCREF(__pyx_int_0);
   __pyx_v_t_fn = __pyx_int_0;
 
-  /* "theano/scan/scan_perform.pyx":177
+  /* "aesara/scan/scan_perform.pyx":177
  *     t0_call = time.time()
  *     t_fn = 0
  *     cdef unsigned int n_outs = n_mit_mot + n_mit_sot + n_sit_sot             # <<<<<<<<<<<<<<
@@ -2762,7 +2762,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_n_outs = ((__pyx_v_n_mit_mot + __pyx_v_n_mit_sot) + __pyx_v_n_sit_sot);
 
-  /* "theano/scan/scan_perform.pyx":178
+  /* "aesara/scan/scan_perform.pyx":178
  *     t_fn = 0
  *     cdef unsigned int n_outs = n_mit_mot + n_mit_sot + n_sit_sot
  *     cdef unsigned int seqs_arg_offset = n_seqs + 1             # <<<<<<<<<<<<<<
@@ -2771,7 +2771,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_seqs_arg_offset = (__pyx_v_n_seqs + 1);
 
-  /* "theano/scan/scan_perform.pyx":180
+  /* "aesara/scan/scan_perform.pyx":180
  *     cdef unsigned int seqs_arg_offset = n_seqs + 1
  *     cdef unsigned int shared_arg_offset = ( 1 + n_seqs + n_mit_mot +
  *                                            n_mit_sot + n_sit_sot)             # <<<<<<<<<<<<<<
@@ -2780,7 +2780,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_shared_arg_offset = ((((1 + __pyx_v_n_seqs) + __pyx_v_n_mit_mot) + __pyx_v_n_mit_sot) + __pyx_v_n_sit_sot);
 
-  /* "theano/scan/scan_perform.pyx":181
+  /* "aesara/scan/scan_perform.pyx":181
  *     cdef unsigned int shared_arg_offset = ( 1 + n_seqs + n_mit_mot +
  *                                            n_mit_sot + n_sit_sot)
  *     cdef unsigned int nit_sot_arg_offset = ( shared_arg_offset +             # <<<<<<<<<<<<<<
@@ -2789,7 +2789,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_nit_sot_arg_offset = (__pyx_v_shared_arg_offset + __pyx_v_n_shared_outs);
 
-  /* "theano/scan/scan_perform.pyx":184
+  /* "aesara/scan/scan_perform.pyx":184
  *                                             n_shared_outs)
  *     cdef unsigned int offset_out
  *     cdef unsigned int lenpos = n_outs + n_nit_sot             # <<<<<<<<<<<<<<
@@ -2798,7 +2798,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_lenpos = (__pyx_v_n_outs + __pyx_v_n_nit_sot);
 
-  /* "theano/scan/scan_perform.pyx":186
+  /* "aesara/scan/scan_perform.pyx":186
  *     cdef unsigned int lenpos = n_outs + n_nit_sot
  *     cdef int pos[500] # put a maximum of 500 outputs
  *     cdef unsigned int len_store_steps = n_mit_mot + n_mit_sot + n_sit_sot + n_nit_sot             # <<<<<<<<<<<<<<
@@ -2807,18 +2807,18 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_len_store_steps = (((__pyx_v_n_mit_mot + __pyx_v_n_mit_sot) + __pyx_v_n_sit_sot) + __pyx_v_n_nit_sot);
 
-  /* "theano/scan/scan_perform.pyx":206
+  /* "aesara/scan/scan_perform.pyx":206
  *     cdef int cond
  *     cdef unsigned int len_output_storage = (n_mit_mot_outs + n_mit_sot +
  *                                             n_sit_sot + n_nit_sot +             # <<<<<<<<<<<<<<
  *                                             n_shared_outs)
- * 
+ *
  */
   __pyx_v_len_output_storage = ((((__pyx_v_n_mit_mot_outs + __pyx_v_n_mit_sot) + __pyx_v_n_sit_sot) + __pyx_v_n_nit_sot) + __pyx_v_n_shared_outs);
 
-  /* "theano/scan/scan_perform.pyx":210
- * 
- * 
+  /* "aesara/scan/scan_perform.pyx":210
+ *
+ *
  *     if n_steps < 0:             # <<<<<<<<<<<<<<
  *         # History, in the past, this was used for backward
  *         # scan. Now we reverse the inputs outside of scan.
@@ -2826,7 +2826,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_t_4 = ((__pyx_v_n_steps < 0) != 0);
   if (unlikely(__pyx_t_4)) {
 
-    /* "theano/scan/scan_perform.pyx":215
+    /* "aesara/scan/scan_perform.pyx":215
  *         raise IndexError(
  *             "Scan was asked to run for negative number of step %d" %
  *             n_steps)             # <<<<<<<<<<<<<<
@@ -2836,7 +2836,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_n_steps); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 215, __pyx_L1_error)
     __Pyx_GOTREF(__pyx_t_1);
 
-    /* "theano/scan/scan_perform.pyx":214
+    /* "aesara/scan/scan_perform.pyx":214
  *         # scan. Now we reverse the inputs outside of scan.
  *         raise IndexError(
  *             "Scan was asked to run for negative number of step %d" %             # <<<<<<<<<<<<<<
@@ -2847,7 +2847,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_GOTREF(__pyx_t_3);
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-    /* "theano/scan/scan_perform.pyx":213
+    /* "aesara/scan/scan_perform.pyx":213
  *         # History, in the past, this was used for backward
  *         # scan. Now we reverse the inputs outside of scan.
  *         raise IndexError(             # <<<<<<<<<<<<<<
@@ -2861,16 +2861,16 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
     __PYX_ERR(0, 213, __pyx_L1_error)
 
-    /* "theano/scan/scan_perform.pyx":210
- * 
- * 
+    /* "aesara/scan/scan_perform.pyx":210
+ *
+ *
  *     if n_steps < 0:             # <<<<<<<<<<<<<<
  *         # History, in the past, this was used for backward
  *         # scan. Now we reverse the inputs outside of scan.
  */
   }
 
-  /* "theano/scan/scan_perform.pyx":216
+  /* "aesara/scan/scan_perform.pyx":216
  *             "Scan was asked to run for negative number of step %d" %
  *             n_steps)
  *     elif n_steps == 0:             # <<<<<<<<<<<<<<
@@ -2880,7 +2880,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_t_4 = ((__pyx_v_n_steps == 0) != 0);
   if (unlikely(__pyx_t_4)) {
 
-    /* "theano/scan/scan_perform.pyx":217
+    /* "aesara/scan/scan_perform.pyx":217
  *             n_steps)
  *     elif n_steps == 0:
  *         raise NotImplementedError(             # <<<<<<<<<<<<<<
@@ -2893,7 +2893,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
     __PYX_ERR(0, 217, __pyx_L1_error)
 
-    /* "theano/scan/scan_perform.pyx":216
+    /* "aesara/scan/scan_perform.pyx":216
  *             "Scan was asked to run for negative number of step %d" %
  *             n_steps)
  *     elif n_steps == 0:             # <<<<<<<<<<<<<<
@@ -2902,7 +2902,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   }
 
-  /* "theano/scan/scan_perform.pyx":220
+  /* "aesara/scan/scan_perform.pyx":220
  *             "We didn't implemented yet the case where scan do 0 iteration")
  *     else:
  *         for idx in range(n_seqs):             # <<<<<<<<<<<<<<
@@ -2915,7 +2915,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_idx = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":221
+      /* "aesara/scan/scan_perform.pyx":221
  *     else:
  *         for idx in range(n_seqs):
  *             if args[<unsigned int>(1+idx)].shape[0] < n_steps:             # <<<<<<<<<<<<<<
@@ -2940,7 +2940,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
       if (unlikely(__pyx_t_4)) {
 
-        /* "theano/scan/scan_perform.pyx":224
+        /* "aesara/scan/scan_perform.pyx":224
  *                 raise ValueError(('Sequence is shorter than the required '
  *                                  'number of steps : (n_steps, seq, '
  *                                   'seq.shape):'), n_steps,             # <<<<<<<<<<<<<<
@@ -2950,7 +2950,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_n_steps); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 224, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_2);
 
-        /* "theano/scan/scan_perform.pyx":225
+        /* "aesara/scan/scan_perform.pyx":225
  *                                  'number of steps : (n_steps, seq, '
  *                                   'seq.shape):'), n_steps,
  *                                   args[1+idx],             # <<<<<<<<<<<<<<
@@ -2961,7 +2961,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_args, __pyx_t_9, long, 1, __Pyx_PyInt_From_long, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 225, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_3);
 
-        /* "theano/scan/scan_perform.pyx":226
+        /* "aesara/scan/scan_perform.pyx":226
  *                                   'seq.shape):'), n_steps,
  *                                   args[1+idx],
  *                                   args[1+idx].shape)             # <<<<<<<<<<<<<<
@@ -2975,7 +2975,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_GOTREF(__pyx_t_10);
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":222
+        /* "aesara/scan/scan_perform.pyx":222
  *         for idx in range(n_seqs):
  *             if args[<unsigned int>(1+idx)].shape[0] < n_steps:
  *                 raise ValueError(('Sequence is shorter than the required '             # <<<<<<<<<<<<<<
@@ -3003,7 +3003,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
         __PYX_ERR(0, 222, __pyx_L1_error)
 
-        /* "theano/scan/scan_perform.pyx":221
+        /* "aesara/scan/scan_perform.pyx":221
  *     else:
  *         for idx in range(n_seqs):
  *             if args[<unsigned int>(1+idx)].shape[0] < n_steps:             # <<<<<<<<<<<<<<
@@ -3014,23 +3014,23 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     }
   }
 
-  /* "theano/scan/scan_perform.pyx":231
+  /* "aesara/scan/scan_perform.pyx":231
  *     #       pos          -- map containing the current position of each output
- * 
+ *
  *     for idx in range(n_mit_mot + n_mit_sot + n_sit_sot):             # <<<<<<<<<<<<<<
  *         store_steps[<unsigned int>idx] = args[<unsigned int>(idx+n_seqs+1)].shape[0]
- * 
+ *
  */
   __pyx_t_5 = ((__pyx_v_n_mit_mot + __pyx_v_n_mit_sot) + __pyx_v_n_sit_sot);
   __pyx_t_6 = __pyx_t_5;
   for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
     __pyx_v_idx = __pyx_t_7;
 
-    /* "theano/scan/scan_perform.pyx":232
- * 
+    /* "aesara/scan/scan_perform.pyx":232
+ *
  *     for idx in range(n_mit_mot + n_mit_sot + n_sit_sot):
  *         store_steps[<unsigned int>idx] = args[<unsigned int>(idx+n_seqs+1)].shape[0]             # <<<<<<<<<<<<<<
- * 
+ *
  *     for idx in range(n_nit_sot):
  */
     __pyx_t_8 = ((unsigned int)((__pyx_v_idx + __pyx_v_n_seqs) + 1));
@@ -3047,9 +3047,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     (__pyx_v_store_steps[((unsigned int)__pyx_v_idx)]) = __pyx_t_11;
   }
 
-  /* "theano/scan/scan_perform.pyx":234
+  /* "aesara/scan/scan_perform.pyx":234
  *         store_steps[<unsigned int>idx] = args[<unsigned int>(idx+n_seqs+1)].shape[0]
- * 
+ *
  *     for idx in range(n_nit_sot):             # <<<<<<<<<<<<<<
  *         store_steps[<unsigned int>(idx + n_mit_mot + n_mit_sot + n_sit_sot)]=\
  *                 args[<unsigned int>(idx + n_mit_mot + n_mit_sot + n_sit_sot
@@ -3059,12 +3059,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
     __pyx_v_idx = __pyx_t_7;
 
-    /* "theano/scan/scan_perform.pyx":236
+    /* "aesara/scan/scan_perform.pyx":236
  *     for idx in range(n_nit_sot):
  *         store_steps[<unsigned int>(idx + n_mit_mot + n_mit_sot + n_sit_sot)]=\
  *                 args[<unsigned int>(idx + n_mit_mot + n_mit_sot + n_sit_sot             # <<<<<<<<<<<<<<
  *                                     + n_shared_outs + n_seqs+1)]
- * 
+ *
  */
     __pyx_t_8 = ((unsigned int)((((((__pyx_v_idx + __pyx_v_n_mit_mot) + __pyx_v_n_mit_sot) + __pyx_v_n_sit_sot) + __pyx_v_n_shared_outs) + __pyx_v_n_seqs) + 1));
     __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_args, __pyx_t_8, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 236, __pyx_L1_error)
@@ -3072,8 +3072,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_11 = __Pyx_PyInt_As_int(__pyx_t_10); if (unlikely((__pyx_t_11 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 236, __pyx_L1_error)
     __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-    /* "theano/scan/scan_perform.pyx":235
- * 
+    /* "aesara/scan/scan_perform.pyx":235
+ *
  *     for idx in range(n_nit_sot):
  *         store_steps[<unsigned int>(idx + n_mit_mot + n_mit_sot + n_sit_sot)]=\             # <<<<<<<<<<<<<<
  *                 args[<unsigned int>(idx + n_mit_mot + n_mit_sot + n_sit_sot
@@ -3082,24 +3082,24 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     (__pyx_v_store_steps[((unsigned int)(((__pyx_v_idx + __pyx_v_n_mit_mot) + __pyx_v_n_mit_sot) + __pyx_v_n_sit_sot))]) = __pyx_t_11;
   }
 
-  /* "theano/scan/scan_perform.pyx":239
+  /* "aesara/scan/scan_perform.pyx":239
  *                                     + n_shared_outs + n_seqs+1)]
- * 
+ *
  *     for idx in range(n_outs + n_nit_sot):             # <<<<<<<<<<<<<<
  *         pos[idx] = (-mintaps[idx])%store_steps[idx]
- * 
+ *
  */
   __pyx_t_5 = (__pyx_v_n_outs + __pyx_v_n_nit_sot);
   __pyx_t_6 = __pyx_t_5;
   for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
     __pyx_v_idx = __pyx_t_7;
 
-    /* "theano/scan/scan_perform.pyx":240
- * 
+    /* "aesara/scan/scan_perform.pyx":240
+ *
  *     for idx in range(n_outs + n_nit_sot):
  *         pos[idx] = (-mintaps[idx])%store_steps[idx]             # <<<<<<<<<<<<<<
- * 
- * 
+ *
+ *
  */
     __pyx_t_12 = __pyx_v_idx;
     __pyx_t_13 = (-(*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_mintaps.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_mintaps.diminfo[0].strides)));
@@ -3110,8 +3110,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     (__pyx_v_pos[__pyx_v_idx]) = __Pyx_mod___pyx_t_5numpy_int32_t(__pyx_t_13, (__pyx_v_store_steps[__pyx_v_idx]));
   }
 
-  /* "theano/scan/scan_perform.pyx":244
- * 
+  /* "aesara/scan/scan_perform.pyx":244
+ *
  *     # 2.1 Create storage space for outputs
  *     for idx in range(n_outs):             # <<<<<<<<<<<<<<
  *         if destroy_map[idx] != 0:
@@ -3122,7 +3122,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
     __pyx_v_idx = __pyx_t_7;
 
-    /* "theano/scan/scan_perform.pyx":245
+    /* "aesara/scan/scan_perform.pyx":245
  *     # 2.1 Create storage space for outputs
  *     for idx in range(n_outs):
  *         if destroy_map[idx] != 0:             # <<<<<<<<<<<<<<
@@ -3133,7 +3133,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_4 = (((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_destroy_map.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_destroy_map.diminfo[0].strides)) != 0) != 0);
     if (__pyx_t_4) {
 
-      /* "theano/scan/scan_perform.pyx":248
+      /* "aesara/scan/scan_perform.pyx":248
  *             # ^ Case 1. Outputs should be computed inplace of their
  *             # initial state
  *             outs[idx][0] = args[ <unsigned int>(1+ n_seqs + idx)]             # <<<<<<<<<<<<<<
@@ -3149,7 +3149,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
       __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-      /* "theano/scan/scan_perform.pyx":245
+      /* "aesara/scan/scan_perform.pyx":245
  *     # 2.1 Create storage space for outputs
  *     for idx in range(n_outs):
  *         if destroy_map[idx] != 0:             # <<<<<<<<<<<<<<
@@ -3159,7 +3159,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       goto __pyx_L15;
     }
 
-    /* "theano/scan/scan_perform.pyx":249
+    /* "aesara/scan/scan_perform.pyx":249
  *             # initial state
  *             outs[idx][0] = args[ <unsigned int>(1+ n_seqs + idx)]
  *         elif ( outs[idx][0] is not None and             # <<<<<<<<<<<<<<
@@ -3180,7 +3180,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       goto __pyx_L16_bool_binop_done;
     }
 
-    /* "theano/scan/scan_perform.pyx":250
+    /* "aesara/scan/scan_perform.pyx":250
  *             outs[idx][0] = args[ <unsigned int>(1+ n_seqs + idx)]
  *         elif ( outs[idx][0] is not None and
  *               outs[idx][0].shape[1:] == args[<unsigned int>(1+ n_seqs + idx)].shape[1:]             # <<<<<<<<<<<<<<
@@ -3218,7 +3218,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       goto __pyx_L16_bool_binop_done;
     }
 
-    /* "theano/scan/scan_perform.pyx":251
+    /* "aesara/scan/scan_perform.pyx":251
  *         elif ( outs[idx][0] is not None and
  *               outs[idx][0].shape[1:] == args[<unsigned int>(1+ n_seqs + idx)].shape[1:]
  *               and outs[idx][0].shape[0] >= store_steps[idx] ):             # <<<<<<<<<<<<<<
@@ -3246,7 +3246,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_4 = __pyx_t_15;
     __pyx_L16_bool_binop_done:;
 
-    /* "theano/scan/scan_perform.pyx":249
+    /* "aesara/scan/scan_perform.pyx":249
  *             # initial state
  *             outs[idx][0] = args[ <unsigned int>(1+ n_seqs + idx)]
  *         elif ( outs[idx][0] is not None and             # <<<<<<<<<<<<<<
@@ -3255,7 +3255,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     if (__pyx_t_4) {
 
-      /* "theano/scan/scan_perform.pyx":253
+      /* "aesara/scan/scan_perform.pyx":253
  *               and outs[idx][0].shape[0] >= store_steps[idx] ):
  *             # Put in the values of the initial state
  *             outs[idx][0] = outs[idx][0][:store_steps[idx]]             # <<<<<<<<<<<<<<
@@ -3276,7 +3276,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
       __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-      /* "theano/scan/scan_perform.pyx":254
+      /* "aesara/scan/scan_perform.pyx":254
  *             # Put in the values of the initial state
  *             outs[idx][0] = outs[idx][0][:store_steps[idx]]
  *             if idx > n_mit_mot:             # <<<<<<<<<<<<<<
@@ -3286,7 +3286,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_4 = ((__pyx_v_idx > __pyx_v_n_mit_mot) != 0);
       if (__pyx_t_4) {
 
-        /* "theano/scan/scan_perform.pyx":255
+        /* "aesara/scan/scan_perform.pyx":255
  *             outs[idx][0] = outs[idx][0][:store_steps[idx]]
  *             if idx > n_mit_mot:
  *                 l = - mintaps[idx]             # <<<<<<<<<<<<<<
@@ -3296,7 +3296,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_12 = __pyx_v_idx;
         __pyx_v_l = (-(*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_mintaps.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_mintaps.diminfo[0].strides)));
 
-        /* "theano/scan/scan_perform.pyx":256
+        /* "aesara/scan/scan_perform.pyx":256
  *             if idx > n_mit_mot:
  *                 l = - mintaps[idx]
  *                 outs[idx][0][:l] = args[<unsigned int>(seqs_arg_offset +             # <<<<<<<<<<<<<<
@@ -3307,7 +3307,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_args, __pyx_t_8, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 256, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_10);
 
-        /* "theano/scan/scan_perform.pyx":257
+        /* "aesara/scan/scan_perform.pyx":257
  *                 l = - mintaps[idx]
  *                 outs[idx][0][:l] = args[<unsigned int>(seqs_arg_offset +
  *                                                        idx)][:l]             # <<<<<<<<<<<<<<
@@ -3318,7 +3318,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_GOTREF(__pyx_t_3);
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":256
+        /* "aesara/scan/scan_perform.pyx":256
  *             if idx > n_mit_mot:
  *                 l = - mintaps[idx]
  *                 outs[idx][0][:l] = args[<unsigned int>(seqs_arg_offset +             # <<<<<<<<<<<<<<
@@ -3334,7 +3334,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-        /* "theano/scan/scan_perform.pyx":254
+        /* "aesara/scan/scan_perform.pyx":254
  *             # Put in the values of the initial state
  *             outs[idx][0] = outs[idx][0][:store_steps[idx]]
  *             if idx > n_mit_mot:             # <<<<<<<<<<<<<<
@@ -3344,7 +3344,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L19;
       }
 
-      /* "theano/scan/scan_perform.pyx":259
+      /* "aesara/scan/scan_perform.pyx":259
  *                                                        idx)][:l]
  *             else:
  *                 outs[idx][0][:] = args[<unsigned int>(seqs_arg_offset + idx)]             # <<<<<<<<<<<<<<
@@ -3366,7 +3366,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       }
       __pyx_L19:;
 
-      /* "theano/scan/scan_perform.pyx":249
+      /* "aesara/scan/scan_perform.pyx":249
  *             # initial state
  *             outs[idx][0] = args[ <unsigned int>(1+ n_seqs + idx)]
  *         elif ( outs[idx][0] is not None and             # <<<<<<<<<<<<<<
@@ -3376,12 +3376,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       goto __pyx_L15;
     }
 
-    /* "theano/scan/scan_perform.pyx":261
+    /* "aesara/scan/scan_perform.pyx":261
  *                 outs[idx][0][:] = args[<unsigned int>(seqs_arg_offset + idx)]
  *         else:
  *             outs[idx][0] = args[<unsigned int>(seqs_arg_offset + idx)].copy()             # <<<<<<<<<<<<<<
- * 
- * 
+ *
+ *
  */
     /*else*/ {
       __pyx_t_8 = ((unsigned int)(__pyx_v_seqs_arg_offset + __pyx_v_idx));
@@ -3414,17 +3414,17 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_L15:;
   }
 
-  /* "theano/scan/scan_perform.pyx":264
- * 
- * 
+  /* "aesara/scan/scan_perform.pyx":264
+ *
+ *
  *     offset = nit_sot_arg_offset + n_nit_sot             # <<<<<<<<<<<<<<
  *     other_args = args[offset:]
  *     input_storage = fnct.input_storage
  */
   __pyx_v_offset = (__pyx_v_nit_sot_arg_offset + __pyx_v_n_nit_sot);
 
-  /* "theano/scan/scan_perform.pyx":265
- * 
+  /* "aesara/scan/scan_perform.pyx":265
+ *
  *     offset = nit_sot_arg_offset + n_nit_sot
  *     other_args = args[offset:]             # <<<<<<<<<<<<<<
  *     input_storage = fnct.input_storage
@@ -3435,7 +3435,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_other_args = __pyx_t_3;
   __pyx_t_3 = 0;
 
-  /* "theano/scan/scan_perform.pyx":266
+  /* "aesara/scan/scan_perform.pyx":266
  *     offset = nit_sot_arg_offset + n_nit_sot
  *     other_args = args[offset:]
  *     input_storage = fnct.input_storage             # <<<<<<<<<<<<<<
@@ -3447,7 +3447,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_input_storage = __pyx_t_3;
   __pyx_t_3 = 0;
 
-  /* "theano/scan/scan_perform.pyx":267
+  /* "aesara/scan/scan_perform.pyx":267
  *     other_args = args[offset:]
  *     input_storage = fnct.input_storage
  *     nb_mitmot_in = 0             # <<<<<<<<<<<<<<
@@ -3457,7 +3457,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __Pyx_INCREF(__pyx_int_0);
   __pyx_v_nb_mitmot_in = __pyx_int_0;
 
-  /* "theano/scan/scan_perform.pyx":268
+  /* "aesara/scan/scan_perform.pyx":268
  *     input_storage = fnct.input_storage
  *     nb_mitmot_in = 0
  *     for idx in range(n_mit_mot):             # <<<<<<<<<<<<<<
@@ -3469,7 +3469,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
     __pyx_v_idx = __pyx_t_7;
 
-    /* "theano/scan/scan_perform.pyx":269
+    /* "aesara/scan/scan_perform.pyx":269
  *     nb_mitmot_in = 0
  *     for idx in range(n_mit_mot):
  *         nb_mitmot_in += tap_array_len[idx]             # <<<<<<<<<<<<<<
@@ -3486,7 +3486,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_1 = 0;
   }
 
-  /* "theano/scan/scan_perform.pyx":270
+  /* "aesara/scan/scan_perform.pyx":270
  *     for idx in range(n_mit_mot):
  *         nb_mitmot_in += tap_array_len[idx]
  *     old_mitmot_input_storage = [None] * nb_mitmot_in             # <<<<<<<<<<<<<<
@@ -3506,7 +3506,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_old_mitmot_input_storage = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":271
+  /* "aesara/scan/scan_perform.pyx":271
  *         nb_mitmot_in += tap_array_len[idx]
  *     old_mitmot_input_storage = [None] * nb_mitmot_in
  *     old_mitmot_input_data = [None] * nb_mitmot_in             # <<<<<<<<<<<<<<
@@ -3526,7 +3526,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_old_mitmot_input_data = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":272
+  /* "aesara/scan/scan_perform.pyx":272
  *     old_mitmot_input_storage = [None] * nb_mitmot_in
  *     old_mitmot_input_data = [None] * nb_mitmot_in
  *     output_storage = fnct.output_storage             # <<<<<<<<<<<<<<
@@ -3538,7 +3538,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_output_storage = __pyx_t_1;
   __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":273
+  /* "aesara/scan/scan_perform.pyx":273
  *     old_mitmot_input_data = [None] * nb_mitmot_in
  *     output_storage = fnct.output_storage
  *     old_output_storage = [None] * len_output_storage             # <<<<<<<<<<<<<<
@@ -3557,7 +3557,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_old_output_storage = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":274
+  /* "aesara/scan/scan_perform.pyx":274
  *     output_storage = fnct.output_storage
  *     old_output_storage = [None] * len_output_storage
  *     old_output_data = [None] * len_output_storage             # <<<<<<<<<<<<<<
@@ -3576,7 +3576,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_old_output_data = ((PyObject*)__pyx_t_1);
   __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":275
+  /* "aesara/scan/scan_perform.pyx":275
  *     old_output_storage = [None] * len_output_storage
  *     old_output_data = [None] * len_output_storage
  *     offset = n_seqs             # <<<<<<<<<<<<<<
@@ -3585,7 +3585,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_offset = __pyx_v_n_seqs;
 
-  /* "theano/scan/scan_perform.pyx":276
+  /* "aesara/scan/scan_perform.pyx":276
  *     old_output_data = [None] * len_output_storage
  *     offset = n_seqs
  *     for idx in range(n_outs):             # <<<<<<<<<<<<<<
@@ -3597,44 +3597,44 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
     __pyx_v_idx = __pyx_t_7;
 
-    /* "theano/scan/scan_perform.pyx":277
+    /* "aesara/scan/scan_perform.pyx":277
  *     offset = n_seqs
  *     for idx in range(n_outs):
  *         offset += tap_array_len[idx]             # <<<<<<<<<<<<<<
  *     offset += n_shared_outs
- * 
+ *
  */
     __pyx_t_12 = __pyx_v_idx;
     __pyx_v_offset = (__pyx_v_offset + (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_tap_array_len.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_tap_array_len.diminfo[0].strides)));
   }
 
-  /* "theano/scan/scan_perform.pyx":278
+  /* "aesara/scan/scan_perform.pyx":278
  *     for idx in range(n_outs):
  *         offset += tap_array_len[idx]
  *     offset += n_shared_outs             # <<<<<<<<<<<<<<
- * 
+ *
  *     for idx in range(len(other_args)):
  */
   __pyx_v_offset = (__pyx_v_offset + __pyx_v_n_shared_outs);
 
-  /* "theano/scan/scan_perform.pyx":280
+  /* "aesara/scan/scan_perform.pyx":280
  *     offset += n_shared_outs
- * 
+ *
  *     for idx in range(len(other_args)):             # <<<<<<<<<<<<<<
  *         input_storage[<unsigned int>(idx+offset)].storage[0] = other_args[idx]
- * 
+ *
  */
   __pyx_t_16 = PyObject_Length(__pyx_v_other_args); if (unlikely(__pyx_t_16 == ((Py_ssize_t)-1))) __PYX_ERR(0, 280, __pyx_L1_error)
   __pyx_t_17 = __pyx_t_16;
   for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_17; __pyx_t_5+=1) {
     __pyx_v_idx = __pyx_t_5;
 
-    /* "theano/scan/scan_perform.pyx":281
- * 
+    /* "aesara/scan/scan_perform.pyx":281
+ *
  *     for idx in range(len(other_args)):
  *         input_storage[<unsigned int>(idx+offset)].storage[0] = other_args[idx]             # <<<<<<<<<<<<<<
- * 
- * 
+ *
+ *
  */
     __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_other_args, __pyx_v_idx, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 281, __pyx_L1_error)
     __Pyx_GOTREF(__pyx_t_1);
@@ -3649,17 +3649,17 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   }
 
-  /* "theano/scan/scan_perform.pyx":284
- * 
- * 
+  /* "aesara/scan/scan_perform.pyx":284
+ *
+ *
  *     i = 0             # <<<<<<<<<<<<<<
  *     cond = 1
  *     ############## THE MAIN LOOP #########################
  */
   __pyx_v_i = 0;
 
-  /* "theano/scan/scan_perform.pyx":285
- * 
+  /* "aesara/scan/scan_perform.pyx":285
+ *
  *     i = 0
  *     cond = 1             # <<<<<<<<<<<<<<
  *     ############## THE MAIN LOOP #########################
@@ -3667,7 +3667,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_cond = 1;
 
-  /* "theano/scan/scan_perform.pyx":288
+  /* "aesara/scan/scan_perform.pyx":288
  *     ############## THE MAIN LOOP #########################
  *     #for i in range(n_steps):
  *     while (i < n_steps) and cond == 1:             # <<<<<<<<<<<<<<
@@ -3686,7 +3686,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_L28_bool_binop_done:;
     if (!__pyx_t_4) break;
 
-    /* "theano/scan/scan_perform.pyx":291
+    /* "aesara/scan/scan_perform.pyx":291
  *         # sequences over which scan iterates
  *         # 3. collect input slices
  *         for idx in range(n_seqs):             # <<<<<<<<<<<<<<
@@ -3698,7 +3698,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_idx = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":292
+      /* "aesara/scan/scan_perform.pyx":292
  *         # 3. collect input slices
  *         for idx in range(n_seqs):
  *             if vector_seqs[idx] == 1:             # <<<<<<<<<<<<<<
@@ -3709,7 +3709,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_4 = (((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_vector_seqs.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_vector_seqs.diminfo[0].strides)) == 1) != 0);
       if (__pyx_t_4) {
 
-        /* "theano/scan/scan_perform.pyx":294
+        /* "aesara/scan/scan_perform.pyx":294
  *             if vector_seqs[idx] == 1:
  *                 input_storage[idx].storage[0] = args[\
  *                             <unsigned int>(1+idx)][i:<unsigned int>(i+1)].reshape(())             # <<<<<<<<<<<<<<
@@ -3718,7 +3718,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
         __pyx_t_8 = ((unsigned int)(1 + __pyx_v_idx));
 
-        /* "theano/scan/scan_perform.pyx":293
+        /* "aesara/scan/scan_perform.pyx":293
  *         for idx in range(n_seqs):
  *             if vector_seqs[idx] == 1:
  *                 input_storage[idx].storage[0] = args[\             # <<<<<<<<<<<<<<
@@ -3728,7 +3728,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_args, __pyx_t_8, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 293, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_10);
 
-        /* "theano/scan/scan_perform.pyx":294
+        /* "aesara/scan/scan_perform.pyx":294
  *             if vector_seqs[idx] == 1:
  *                 input_storage[idx].storage[0] = args[\
  *                             <unsigned int>(1+idx)][i:<unsigned int>(i+1)].reshape(())             # <<<<<<<<<<<<<<
@@ -3757,7 +3757,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_GOTREF(__pyx_t_1);
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":293
+        /* "aesara/scan/scan_perform.pyx":293
  *         for idx in range(n_seqs):
  *             if vector_seqs[idx] == 1:
  *                 input_storage[idx].storage[0] = args[\             # <<<<<<<<<<<<<<
@@ -3773,7 +3773,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":292
+        /* "aesara/scan/scan_perform.pyx":292
  *         # 3. collect input slices
  *         for idx in range(n_seqs):
  *             if vector_seqs[idx] == 1:             # <<<<<<<<<<<<<<
@@ -3783,11 +3783,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L32;
       }
 
-      /* "theano/scan/scan_perform.pyx":297
+      /* "aesara/scan/scan_perform.pyx":297
  *             else:
  *                 input_storage[idx].storage[0] = \
  *                         args[<unsigned int>(idx+1)][i]             # <<<<<<<<<<<<<<
- * 
+ *
  *         offset = n_seqs
  */
       /*else*/ {
@@ -3798,12 +3798,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_GOTREF(__pyx_t_3);
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":296
+        /* "aesara/scan/scan_perform.pyx":296
  *                             <unsigned int>(1+idx)][i:<unsigned int>(i+1)].reshape(())
  *             else:
  *                 input_storage[idx].storage[0] = \             # <<<<<<<<<<<<<<
  *                         args[<unsigned int>(idx+1)][i]
- * 
+ *
  */
         __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_input_storage, __pyx_v_idx, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 296, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_1);
@@ -3817,17 +3817,17 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L32:;
     }
 
-    /* "theano/scan/scan_perform.pyx":299
+    /* "aesara/scan/scan_perform.pyx":299
  *                         args[<unsigned int>(idx+1)][i]
- * 
+ *
  *         offset = n_seqs             # <<<<<<<<<<<<<<
  *         for idx in range(n_outs):
  *             if vector_outs[idx] == 1:
  */
     __pyx_v_offset = __pyx_v_n_seqs;
 
-    /* "theano/scan/scan_perform.pyx":300
- * 
+    /* "aesara/scan/scan_perform.pyx":300
+ *
  *         offset = n_seqs
  *         for idx in range(n_outs):             # <<<<<<<<<<<<<<
  *             if vector_outs[idx] == 1:
@@ -3838,7 +3838,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_idx = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":301
+      /* "aesara/scan/scan_perform.pyx":301
  *         offset = n_seqs
  *         for idx in range(n_outs):
  *             if vector_outs[idx] == 1:             # <<<<<<<<<<<<<<
@@ -3849,7 +3849,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_4 = (((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_vector_outs.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_vector_outs.diminfo[0].strides)) == 1) != 0);
       if (__pyx_t_4) {
 
-        /* "theano/scan/scan_perform.pyx":302
+        /* "aesara/scan/scan_perform.pyx":302
  *         for idx in range(n_outs):
  *             if vector_outs[idx] == 1:
  *                 for tdx in range(tap_array_len[idx]):             # <<<<<<<<<<<<<<
@@ -3862,7 +3862,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_18; __pyx_t_8+=1) {
           __pyx_v_tdx = __pyx_t_8;
 
-          /* "theano/scan/scan_perform.pyx":303
+          /* "aesara/scan/scan_perform.pyx":303
  *             if vector_outs[idx] == 1:
  *                 for tdx in range(tap_array_len[idx]):
  *                     tap = tap_array[idx,tdx]             # <<<<<<<<<<<<<<
@@ -3873,7 +3873,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_19 = __pyx_v_tdx;
           __pyx_v_tap = (*__Pyx_BufPtrStrided2d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_tap_array.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_tap_array.diminfo[0].strides, __pyx_t_19, __pyx_pybuffernd_tap_array.diminfo[1].strides));
 
-          /* "theano/scan/scan_perform.pyx":304
+          /* "aesara/scan/scan_perform.pyx":304
  *                 for tdx in range(tap_array_len[idx]):
  *                     tap = tap_array[idx,tdx]
  *                     _idx = (pos[idx]+tap)%store_steps[idx]             # <<<<<<<<<<<<<<
@@ -3887,7 +3887,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           }
           __pyx_v__idx = __Pyx_mod_int(__pyx_t_11, (__pyx_v_store_steps[__pyx_v_idx]));
 
-          /* "theano/scan/scan_perform.pyx":306
+          /* "aesara/scan/scan_perform.pyx":306
  *                     _idx = (pos[idx]+tap)%store_steps[idx]
  *                     input_storage[offset].storage[0] =\
  *                             outs[idx][0][_idx:<unsigned int>(_idx+1)].reshape(())             # <<<<<<<<<<<<<<
@@ -3921,7 +3921,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_GOTREF(__pyx_t_3);
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-          /* "theano/scan/scan_perform.pyx":305
+          /* "aesara/scan/scan_perform.pyx":305
  *                     tap = tap_array[idx,tdx]
  *                     _idx = (pos[idx]+tap)%store_steps[idx]
  *                     input_storage[offset].storage[0] =\             # <<<<<<<<<<<<<<
@@ -3937,7 +3937,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
           __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-          /* "theano/scan/scan_perform.pyx":307
+          /* "aesara/scan/scan_perform.pyx":307
  *                     input_storage[offset].storage[0] =\
  *                             outs[idx][0][_idx:<unsigned int>(_idx+1)].reshape(())
  *                     offset += 1             # <<<<<<<<<<<<<<
@@ -3947,7 +3947,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_v_offset = (__pyx_v_offset + 1);
         }
 
-        /* "theano/scan/scan_perform.pyx":301
+        /* "aesara/scan/scan_perform.pyx":301
  *         offset = n_seqs
  *         for idx in range(n_outs):
  *             if vector_outs[idx] == 1:             # <<<<<<<<<<<<<<
@@ -3957,7 +3957,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L35;
       }
 
-      /* "theano/scan/scan_perform.pyx":309
+      /* "aesara/scan/scan_perform.pyx":309
  *                     offset += 1
  *             else:
  *                 for tdx in range(tap_array_len[idx]):             # <<<<<<<<<<<<<<
@@ -3971,7 +3971,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_18; __pyx_t_8+=1) {
           __pyx_v_tdx = __pyx_t_8;
 
-          /* "theano/scan/scan_perform.pyx":310
+          /* "aesara/scan/scan_perform.pyx":310
  *             else:
  *                 for tdx in range(tap_array_len[idx]):
  *                     tap = tap_array[idx,tdx]             # <<<<<<<<<<<<<<
@@ -3982,7 +3982,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_12 = __pyx_v_tdx;
           __pyx_v_tap = (*__Pyx_BufPtrStrided2d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_tap_array.rcbuffer->pybuffer.buf, __pyx_t_19, __pyx_pybuffernd_tap_array.diminfo[0].strides, __pyx_t_12, __pyx_pybuffernd_tap_array.diminfo[1].strides));
 
-          /* "theano/scan/scan_perform.pyx":311
+          /* "aesara/scan/scan_perform.pyx":311
  *                 for tdx in range(tap_array_len[idx]):
  *                     tap = tap_array[idx,tdx]
  *                     _idx = (pos[idx]+tap)%store_steps[idx]             # <<<<<<<<<<<<<<
@@ -3996,12 +3996,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           }
           __pyx_v__idx = __Pyx_mod_int(__pyx_t_11, (__pyx_v_store_steps[__pyx_v_idx]));
 
-          /* "theano/scan/scan_perform.pyx":312
+          /* "aesara/scan/scan_perform.pyx":312
  *                     tap = tap_array[idx,tdx]
  *                     _idx = (pos[idx]+tap)%store_steps[idx]
  *                     input_storage[offset].storage[0] = outs[idx][0][_idx]             # <<<<<<<<<<<<<<
  *                     offset += 1
- * 
+ *
  */
           __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_outs, __pyx_v_idx, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 312, __pyx_L1_error)
           __Pyx_GOTREF(__pyx_t_3);
@@ -4020,12 +4020,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
           __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-          /* "theano/scan/scan_perform.pyx":313
+          /* "aesara/scan/scan_perform.pyx":313
  *                     _idx = (pos[idx]+tap)%store_steps[idx]
  *                     input_storage[offset].storage[0] = outs[idx][0][_idx]
  *                     offset += 1             # <<<<<<<<<<<<<<
- * 
- * 
+ *
+ *
  */
           __pyx_v_offset = (__pyx_v_offset + 1);
         }
@@ -4033,17 +4033,17 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L35:;
     }
 
-    /* "theano/scan/scan_perform.pyx":316
- * 
- * 
+    /* "aesara/scan/scan_perform.pyx":316
+ *
+ *
  *         a_offset = shared_arg_offset             # <<<<<<<<<<<<<<
  *         o_offset = n_outs + n_nit_sot
  *         if i == 0:
  */
     __pyx_v_a_offset = __pyx_v_shared_arg_offset;
 
-    /* "theano/scan/scan_perform.pyx":317
- * 
+    /* "aesara/scan/scan_perform.pyx":317
+ *
  *         a_offset = shared_arg_offset
  *         o_offset = n_outs + n_nit_sot             # <<<<<<<<<<<<<<
  *         if i == 0:
@@ -4051,7 +4051,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     __pyx_v_o_offset = (__pyx_v_n_outs + __pyx_v_n_nit_sot);
 
-    /* "theano/scan/scan_perform.pyx":318
+    /* "aesara/scan/scan_perform.pyx":318
  *         a_offset = shared_arg_offset
  *         o_offset = n_outs + n_nit_sot
  *         if i == 0:             # <<<<<<<<<<<<<<
@@ -4061,7 +4061,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_4 = ((__pyx_v_i == 0) != 0);
     if (__pyx_t_4) {
 
-      /* "theano/scan/scan_perform.pyx":319
+      /* "aesara/scan/scan_perform.pyx":319
  *         o_offset = n_outs + n_nit_sot
  *         if i == 0:
  *             for j in range(n_shared_outs):             # <<<<<<<<<<<<<<
@@ -4073,7 +4073,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
         __pyx_v_j = __pyx_t_7;
 
-        /* "theano/scan/scan_perform.pyx":320
+        /* "aesara/scan/scan_perform.pyx":320
  *         if i == 0:
  *             for j in range(n_shared_outs):
  *                 input_storage[offset].storage[0] = args[<unsigned int>(a_offset+j)]             # <<<<<<<<<<<<<<
@@ -4092,7 +4092,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-        /* "theano/scan/scan_perform.pyx":321
+        /* "aesara/scan/scan_perform.pyx":321
  *             for j in range(n_shared_outs):
  *                 input_storage[offset].storage[0] = args[<unsigned int>(a_offset+j)]
  *                 offset += 1             # <<<<<<<<<<<<<<
@@ -4102,7 +4102,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_v_offset = (__pyx_v_offset + 1);
       }
 
-      /* "theano/scan/scan_perform.pyx":318
+      /* "aesara/scan/scan_perform.pyx":318
  *         a_offset = shared_arg_offset
  *         o_offset = n_outs + n_nit_sot
  *         if i == 0:             # <<<<<<<<<<<<<<
@@ -4112,7 +4112,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       goto __pyx_L40;
     }
 
-    /* "theano/scan/scan_perform.pyx":323
+    /* "aesara/scan/scan_perform.pyx":323
  *                 offset += 1
  *         else:
  *             for j in range(n_shared_outs):             # <<<<<<<<<<<<<<
@@ -4125,12 +4125,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
         __pyx_v_j = __pyx_t_7;
 
-        /* "theano/scan/scan_perform.pyx":324
+        /* "aesara/scan/scan_perform.pyx":324
  *         else:
  *             for j in range(n_shared_outs):
  *                 input_storage[offset].storage[0] = outs[<unsigned int>(o_offset+j)][0]             # <<<<<<<<<<<<<<
  *                 offset += 1
- * 
+ *
  */
         __pyx_t_8 = ((unsigned int)(__pyx_v_o_offset + __pyx_v_j));
         __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_outs, __pyx_t_8, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 324, __pyx_L1_error)
@@ -4147,11 +4147,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":325
+        /* "aesara/scan/scan_perform.pyx":325
  *             for j in range(n_shared_outs):
  *                 input_storage[offset].storage[0] = outs[<unsigned int>(o_offset+j)][0]
  *                 offset += 1             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 4. collecting slices where the output should be stored
  */
         __pyx_v_offset = (__pyx_v_offset + 1);
@@ -4159,8 +4159,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     }
     __pyx_L40:;
 
-    /* "theano/scan/scan_perform.pyx":330
- * 
+    /* "aesara/scan/scan_perform.pyx":330
+ *
  *         # 4.1. Collect slices for mitmots
  *         offset = 0             # <<<<<<<<<<<<<<
  *         for idx in range(n_mit_mot_outs):
@@ -4168,7 +4168,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     __pyx_v_offset = 0;
 
-    /* "theano/scan/scan_perform.pyx":331
+    /* "aesara/scan/scan_perform.pyx":331
  *         # 4.1. Collect slices for mitmots
  *         offset = 0
  *         for idx in range(n_mit_mot_outs):             # <<<<<<<<<<<<<<
@@ -4180,7 +4180,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_idx = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":332
+      /* "aesara/scan/scan_perform.pyx":332
  *         offset = 0
  *         for idx in range(n_mit_mot_outs):
  *             if not mitmots_preallocated[<unsigned int>idx]:             # <<<<<<<<<<<<<<
@@ -4191,12 +4191,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_4 = ((!((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_mitmots_preallocated.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_mitmots_preallocated.diminfo[0].strides)) != 0)) != 0);
       if (__pyx_t_4) {
 
-        /* "theano/scan/scan_perform.pyx":333
+        /* "aesara/scan/scan_perform.pyx":333
  *         for idx in range(n_mit_mot_outs):
  *             if not mitmots_preallocated[<unsigned int>idx]:
  *                 output_storage[<unsigned int>offset].storage[0] = None             # <<<<<<<<<<<<<<
  *                 offset += 1
- * 
+ *
  */
         __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_output_storage, ((unsigned int)__pyx_v_offset), unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 333, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_10);
@@ -4206,16 +4206,16 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         if (unlikely(__Pyx_SetItemInt(__pyx_t_1, 0, Py_None, long, 1, __Pyx_PyInt_From_long, 0, 0, 0) < 0)) __PYX_ERR(0, 333, __pyx_L1_error)
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":334
+        /* "aesara/scan/scan_perform.pyx":334
  *             if not mitmots_preallocated[<unsigned int>idx]:
  *                 output_storage[<unsigned int>offset].storage[0] = None
  *                 offset += 1             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 4.2. Collect slices for mitsots, sitsots and nitsots
  */
         __pyx_v_offset = (__pyx_v_offset + 1);
 
-        /* "theano/scan/scan_perform.pyx":332
+        /* "aesara/scan/scan_perform.pyx":332
  *         offset = 0
  *         for idx in range(n_mit_mot_outs):
  *             if not mitmots_preallocated[<unsigned int>idx]:             # <<<<<<<<<<<<<<
@@ -4225,8 +4225,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       }
     }
 
-    /* "theano/scan/scan_perform.pyx":337
- * 
+    /* "aesara/scan/scan_perform.pyx":337
+ *
  *         # 4.2. Collect slices for mitsots, sitsots and nitsots
  *         if i != 0:             # <<<<<<<<<<<<<<
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):
@@ -4235,7 +4235,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_4 = ((__pyx_v_i != 0) != 0);
     if (__pyx_t_4) {
 
-      /* "theano/scan/scan_perform.pyx":338
+      /* "aesara/scan/scan_perform.pyx":338
  *         # 4.2. Collect slices for mitsots, sitsots and nitsots
  *         if i != 0:
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):             # <<<<<<<<<<<<<<
@@ -4247,7 +4247,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
         __pyx_v_idx = __pyx_t_7;
 
-        /* "theano/scan/scan_perform.pyx":339
+        /* "aesara/scan/scan_perform.pyx":339
  *         if i != 0:
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):
  *                 if ( store_steps[<unsigned int>(idx+n_mit_mot)] == 1 or             # <<<<<<<<<<<<<<
@@ -4261,7 +4261,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L52_bool_binop_done;
         }
 
-        /* "theano/scan/scan_perform.pyx":340
+        /* "aesara/scan/scan_perform.pyx":340
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):
  *                 if ( store_steps[<unsigned int>(idx+n_mit_mot)] == 1 or
  *                     vector_outs[<unsigned int>(idx+n_mit_mot)] == 1):             # <<<<<<<<<<<<<<
@@ -4273,7 +4273,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_4 = __pyx_t_15;
         __pyx_L52_bool_binop_done:;
 
-        /* "theano/scan/scan_perform.pyx":339
+        /* "aesara/scan/scan_perform.pyx":339
  *         if i != 0:
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):
  *                 if ( store_steps[<unsigned int>(idx+n_mit_mot)] == 1 or             # <<<<<<<<<<<<<<
@@ -4282,7 +4282,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
         if (__pyx_t_4) {
 
-          /* "theano/scan/scan_perform.pyx":341
+          /* "aesara/scan/scan_perform.pyx":341
  *                 if ( store_steps[<unsigned int>(idx+n_mit_mot)] == 1 or
  *                     vector_outs[<unsigned int>(idx+n_mit_mot)] == 1):
  *                     output_storage[<unsigned int>(idx+offset)].storage[0] = None             # <<<<<<<<<<<<<<
@@ -4298,7 +4298,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           if (unlikely(__Pyx_SetItemInt(__pyx_t_10, 0, Py_None, long, 1, __Pyx_PyInt_From_long, 0, 0, 0) < 0)) __PYX_ERR(0, 341, __pyx_L1_error)
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-          /* "theano/scan/scan_perform.pyx":339
+          /* "aesara/scan/scan_perform.pyx":339
  *         if i != 0:
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):
  *                 if ( store_steps[<unsigned int>(idx+n_mit_mot)] == 1 or             # <<<<<<<<<<<<<<
@@ -4308,7 +4308,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L51;
         }
 
-        /* "theano/scan/scan_perform.pyx":344
+        /* "aesara/scan/scan_perform.pyx":344
  *                 else:
  *                     output_storage[<unsigned int>(idx+offset)].storage[0] =\
  *                         outs[<unsigned int>(idx+n_mit_mot)][0][pos[\             # <<<<<<<<<<<<<<
@@ -4323,7 +4323,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-          /* "theano/scan/scan_perform.pyx":345
+          /* "aesara/scan/scan_perform.pyx":345
  *                     output_storage[<unsigned int>(idx+offset)].storage[0] =\
  *                         outs[<unsigned int>(idx+n_mit_mot)][0][pos[\
  *                                             <unsigned int>(idx+n_mit_mot)]]             # <<<<<<<<<<<<<<
@@ -4332,7 +4332,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
           __pyx_t_11 = (__pyx_v_pos[((unsigned int)(__pyx_v_idx + __pyx_v_n_mit_mot))]);
 
-          /* "theano/scan/scan_perform.pyx":344
+          /* "aesara/scan/scan_perform.pyx":344
  *                 else:
  *                     output_storage[<unsigned int>(idx+offset)].storage[0] =\
  *                         outs[<unsigned int>(idx+n_mit_mot)][0][pos[\             # <<<<<<<<<<<<<<
@@ -4343,7 +4343,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_GOTREF(__pyx_t_10);
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-          /* "theano/scan/scan_perform.pyx":343
+          /* "aesara/scan/scan_perform.pyx":343
  *                     output_storage[<unsigned int>(idx+offset)].storage[0] = None
  *                 else:
  *                     output_storage[<unsigned int>(idx+offset)].storage[0] =\             # <<<<<<<<<<<<<<
@@ -4363,8 +4363,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_L51:;
       }
 
-      /* "theano/scan/scan_perform.pyx":337
- * 
+      /* "aesara/scan/scan_perform.pyx":337
+ *
  *         # 4.2. Collect slices for mitsots, sitsots and nitsots
  *         if i != 0:             # <<<<<<<<<<<<<<
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):
@@ -4373,12 +4373,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       goto __pyx_L48;
     }
 
-    /* "theano/scan/scan_perform.pyx":347
+    /* "aesara/scan/scan_perform.pyx":347
  *                                             <unsigned int>(idx+n_mit_mot)]]
  *         else:
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):             # <<<<<<<<<<<<<<
  *                 output_storage[<unsigned int>(idx+offset)].storage[0] = None
- * 
+ *
  */
     /*else*/ {
       __pyx_t_5 = ((__pyx_v_n_outs + __pyx_v_n_nit_sot) - __pyx_v_n_mit_mot);
@@ -4386,11 +4386,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
         __pyx_v_idx = __pyx_t_7;
 
-        /* "theano/scan/scan_perform.pyx":348
+        /* "aesara/scan/scan_perform.pyx":348
  *         else:
  *             for idx in range(n_outs + n_nit_sot - n_mit_mot):
  *                 output_storage[<unsigned int>(idx+offset)].storage[0] = None             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 4.3. Collect slices for shared outputs
  */
         __pyx_t_8 = ((unsigned int)(__pyx_v_idx + __pyx_v_offset));
@@ -4405,8 +4405,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     }
     __pyx_L48:;
 
-    /* "theano/scan/scan_perform.pyx":351
- * 
+    /* "aesara/scan/scan_perform.pyx":351
+ *
  *         # 4.3. Collect slices for shared outputs
  *         offset += n_outs+n_nit_sot - n_mit_mot             # <<<<<<<<<<<<<<
  *         for idx in range(n_shared_outs):
@@ -4414,23 +4414,23 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     __pyx_v_offset = (__pyx_v_offset + ((__pyx_v_n_outs + __pyx_v_n_nit_sot) - __pyx_v_n_mit_mot));
 
-    /* "theano/scan/scan_perform.pyx":352
+    /* "aesara/scan/scan_perform.pyx":352
  *         # 4.3. Collect slices for shared outputs
  *         offset += n_outs+n_nit_sot - n_mit_mot
  *         for idx in range(n_shared_outs):             # <<<<<<<<<<<<<<
  *             output_storage[<unsigned int>(idx+offset)].storage[0] = None
- * 
+ *
  */
     __pyx_t_5 = __pyx_v_n_shared_outs;
     __pyx_t_6 = __pyx_t_5;
     for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_idx = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":353
+      /* "aesara/scan/scan_perform.pyx":353
  *         offset += n_outs+n_nit_sot - n_mit_mot
  *         for idx in range(n_shared_outs):
  *             output_storage[<unsigned int>(idx+offset)].storage[0] = None             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 4.4. If there is a condition add it to the mix
  */
       __pyx_t_8 = ((unsigned int)(__pyx_v_idx + __pyx_v_offset));
@@ -4443,8 +4443,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
     }
 
-    /* "theano/scan/scan_perform.pyx":356
- * 
+    /* "aesara/scan/scan_perform.pyx":356
+ *
  *         # 4.4. If there is a condition add it to the mix
  *         if as_while:             # <<<<<<<<<<<<<<
  *             pdx = offset + n_shared_outs
@@ -4453,20 +4453,20 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_4 = (__pyx_v_as_while != 0);
     if (__pyx_t_4) {
 
-      /* "theano/scan/scan_perform.pyx":357
+      /* "aesara/scan/scan_perform.pyx":357
  *         # 4.4. If there is a condition add it to the mix
  *         if as_while:
  *             pdx = offset + n_shared_outs             # <<<<<<<<<<<<<<
  *             output_storage[<unsigned int>pdx].storage[0] = None
- * 
+ *
  */
       __pyx_v_pdx = (__pyx_v_offset + __pyx_v_n_shared_outs);
 
-      /* "theano/scan/scan_perform.pyx":358
+      /* "aesara/scan/scan_perform.pyx":358
  *         if as_while:
  *             pdx = offset + n_shared_outs
  *             output_storage[<unsigned int>pdx].storage[0] = None             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 4.5. Keep a reference to the variables (ndarrays, GpuArrays,
  */
       __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_output_storage, ((unsigned int)__pyx_v_pdx), unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 358, __pyx_L1_error)
@@ -4477,8 +4477,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       if (unlikely(__Pyx_SetItemInt(__pyx_t_3, 0, Py_None, long, 1, __Pyx_PyInt_From_long, 0, 0, 0) < 0)) __PYX_ERR(0, 358, __pyx_L1_error)
       __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-      /* "theano/scan/scan_perform.pyx":356
- * 
+      /* "aesara/scan/scan_perform.pyx":356
+ *
  *         # 4.4. If there is a condition add it to the mix
  *         if as_while:             # <<<<<<<<<<<<<<
  *             pdx = offset + n_shared_outs
@@ -4486,11 +4486,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     }
 
-    /* "theano/scan/scan_perform.pyx":366
+    /* "aesara/scan/scan_perform.pyx":366
  *         # cases where outputs reused the allocated object but alter the
  *         # memory region they refer to.
  *         for idx in range(len_output_storage):             # <<<<<<<<<<<<<<
- * 
+ *
  *             var = output_storage[idx].storage[0]
  */
     __pyx_t_5 = __pyx_v_len_output_storage;
@@ -4498,12 +4498,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_idx = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":368
+      /* "aesara/scan/scan_perform.pyx":368
  *         for idx in range(len_output_storage):
- * 
+ *
  *             var = output_storage[idx].storage[0]             # <<<<<<<<<<<<<<
  *             old_output_storage[idx] = var
- * 
+ *
  */
       __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_output_storage, __pyx_v_idx, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 368, __pyx_L1_error)
       __Pyx_GOTREF(__pyx_t_3);
@@ -4516,18 +4516,18 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_XDECREF_SET(__pyx_v_var, __pyx_t_3);
       __pyx_t_3 = 0;
 
-      /* "theano/scan/scan_perform.pyx":369
- * 
+      /* "aesara/scan/scan_perform.pyx":369
+ *
  *             var = output_storage[idx].storage[0]
  *             old_output_storage[idx] = var             # <<<<<<<<<<<<<<
- * 
+ *
  *             if var is None:
  */
       if (unlikely(__Pyx_SetItemInt(__pyx_v_old_output_storage, __pyx_v_idx, __pyx_v_var, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 1, 0, 0) < 0)) __PYX_ERR(0, 369, __pyx_L1_error)
 
-      /* "theano/scan/scan_perform.pyx":371
+      /* "aesara/scan/scan_perform.pyx":371
  *             old_output_storage[idx] = var
- * 
+ *
  *             if var is None:             # <<<<<<<<<<<<<<
  *                 old_output_data[idx] = None
  *             elif outs_is_tensor[idx]:
@@ -4536,8 +4536,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_15 = (__pyx_t_4 != 0);
       if (__pyx_t_15) {
 
-        /* "theano/scan/scan_perform.pyx":372
- * 
+        /* "aesara/scan/scan_perform.pyx":372
+ *
  *             if var is None:
  *                 old_output_data[idx] = None             # <<<<<<<<<<<<<<
  *             elif outs_is_tensor[idx]:
@@ -4545,9 +4545,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
         if (unlikely(__Pyx_SetItemInt(__pyx_v_old_output_data, __pyx_v_idx, Py_None, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 1, 0, 0) < 0)) __PYX_ERR(0, 372, __pyx_L1_error)
 
-        /* "theano/scan/scan_perform.pyx":371
+        /* "aesara/scan/scan_perform.pyx":371
  *             old_output_storage[idx] = var
- * 
+ *
  *             if var is None:             # <<<<<<<<<<<<<<
  *                 old_output_data[idx] = None
  *             elif outs_is_tensor[idx]:
@@ -4555,7 +4555,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L61;
       }
 
-      /* "theano/scan/scan_perform.pyx":373
+      /* "aesara/scan/scan_perform.pyx":373
  *             if var is None:
  *                 old_output_data[idx] = None
  *             elif outs_is_tensor[idx]:             # <<<<<<<<<<<<<<
@@ -4566,7 +4566,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_15 = ((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_outs_is_tensor.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_outs_is_tensor.diminfo[0].strides)) != 0);
       if (__pyx_t_15) {
 
-        /* "theano/scan/scan_perform.pyx":374
+        /* "aesara/scan/scan_perform.pyx":374
  *                 old_output_data[idx] = None
  *             elif outs_is_tensor[idx]:
  *                 old_output_data[idx] = var.data             # <<<<<<<<<<<<<<
@@ -4578,7 +4578,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         if (unlikely(__Pyx_SetItemInt(__pyx_v_old_output_data, __pyx_v_idx, __pyx_t_3, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 1, 0, 0) < 0)) __PYX_ERR(0, 374, __pyx_L1_error)
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-        /* "theano/scan/scan_perform.pyx":373
+        /* "aesara/scan/scan_perform.pyx":373
  *             if var is None:
  *                 old_output_data[idx] = None
  *             elif outs_is_tensor[idx]:             # <<<<<<<<<<<<<<
@@ -4588,11 +4588,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L61;
       }
 
-      /* "theano/scan/scan_perform.pyx":376
+      /* "aesara/scan/scan_perform.pyx":376
  *                 old_output_data[idx] = var.data
  *             else:
  *                 old_output_data[idx] = var.gpudata             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 4.6. Keep a reference to the variables (ndarrays, GpuArrays,
  */
       /*else*/ {
@@ -4604,7 +4604,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L61:;
     }
 
-    /* "theano/scan/scan_perform.pyx":384
+    /* "aesara/scan/scan_perform.pyx":384
  *         # be able to detect cases where outputs reused the allocated object
  *         # but alter the memory region they refer to.
  *         for idx in xrange(nb_mitmot_in):             # <<<<<<<<<<<<<<
@@ -4616,12 +4616,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_20; __pyx_t_5+=1) {
       __pyx_v_idx = __pyx_t_5;
 
-      /* "theano/scan/scan_perform.pyx":385
+      /* "aesara/scan/scan_perform.pyx":385
  *         # but alter the memory region they refer to.
  *         for idx in xrange(nb_mitmot_in):
  *             var = input_storage[idx + n_seqs].storage[0]             # <<<<<<<<<<<<<<
  *             old_mitmot_input_storage[idx] = var
- * 
+ *
  */
       __pyx_t_6 = (__pyx_v_idx + __pyx_v_n_seqs);
       __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_input_storage, __pyx_t_6, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error)
@@ -4635,18 +4635,18 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_XDECREF_SET(__pyx_v_var, __pyx_t_3);
       __pyx_t_3 = 0;
 
-      /* "theano/scan/scan_perform.pyx":386
+      /* "aesara/scan/scan_perform.pyx":386
  *         for idx in xrange(nb_mitmot_in):
  *             var = input_storage[idx + n_seqs].storage[0]
  *             old_mitmot_input_storage[idx] = var             # <<<<<<<<<<<<<<
- * 
+ *
  *             if var is None:
  */
       if (unlikely(__Pyx_SetItemInt(__pyx_v_old_mitmot_input_storage, __pyx_v_idx, __pyx_v_var, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 1, 0, 0) < 0)) __PYX_ERR(0, 386, __pyx_L1_error)
 
-      /* "theano/scan/scan_perform.pyx":388
+      /* "aesara/scan/scan_perform.pyx":388
  *             old_mitmot_input_storage[idx] = var
- * 
+ *
  *             if var is None:             # <<<<<<<<<<<<<<
  *                 old_mitmot_input_data[idx] = None
  *             elif inps_is_tensor[idx + n_seqs]:
@@ -4655,8 +4655,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_4 = (__pyx_t_15 != 0);
       if (__pyx_t_4) {
 
-        /* "theano/scan/scan_perform.pyx":389
- * 
+        /* "aesara/scan/scan_perform.pyx":389
+ *
  *             if var is None:
  *                 old_mitmot_input_data[idx] = None             # <<<<<<<<<<<<<<
  *             elif inps_is_tensor[idx + n_seqs]:
@@ -4664,9 +4664,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
         if (unlikely(__Pyx_SetItemInt(__pyx_v_old_mitmot_input_data, __pyx_v_idx, Py_None, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 1, 0, 0) < 0)) __PYX_ERR(0, 389, __pyx_L1_error)
 
-        /* "theano/scan/scan_perform.pyx":388
+        /* "aesara/scan/scan_perform.pyx":388
  *             old_mitmot_input_storage[idx] = var
- * 
+ *
  *             if var is None:             # <<<<<<<<<<<<<<
  *                 old_mitmot_input_data[idx] = None
  *             elif inps_is_tensor[idx + n_seqs]:
@@ -4674,7 +4674,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L64;
       }
 
-      /* "theano/scan/scan_perform.pyx":390
+      /* "aesara/scan/scan_perform.pyx":390
  *             if var is None:
  *                 old_mitmot_input_data[idx] = None
  *             elif inps_is_tensor[idx + n_seqs]:             # <<<<<<<<<<<<<<
@@ -4685,7 +4685,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_4 = ((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_inps_is_tensor.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_inps_is_tensor.diminfo[0].strides)) != 0);
       if (__pyx_t_4) {
 
-        /* "theano/scan/scan_perform.pyx":391
+        /* "aesara/scan/scan_perform.pyx":391
  *                 old_mitmot_input_data[idx] = None
  *             elif inps_is_tensor[idx + n_seqs]:
  *                 old_mitmot_input_data[idx] = var.data             # <<<<<<<<<<<<<<
@@ -4697,7 +4697,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         if (unlikely(__Pyx_SetItemInt(__pyx_v_old_mitmot_input_data, __pyx_v_idx, __pyx_t_3, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 1, 0, 0) < 0)) __PYX_ERR(0, 391, __pyx_L1_error)
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-        /* "theano/scan/scan_perform.pyx":390
+        /* "aesara/scan/scan_perform.pyx":390
  *             if var is None:
  *                 old_mitmot_input_data[idx] = None
  *             elif inps_is_tensor[idx + n_seqs]:             # <<<<<<<<<<<<<<
@@ -4707,11 +4707,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L64;
       }
 
-      /* "theano/scan/scan_perform.pyx":393
+      /* "aesara/scan/scan_perform.pyx":393
  *                 old_mitmot_input_data[idx] = var.data
  *             else:
  *                 old_mitmot_input_data[idx] = var.gpudata             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 5.1 compute outputs
  */
       /*else*/ {
@@ -4723,11 +4723,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L64:;
     }
 
-    /* "theano/scan/scan_perform.pyx":396
- * 
+    /* "aesara/scan/scan_perform.pyx":396
+ *
  *         # 5.1 compute outputs
  *         t0_fn = time.time()             # <<<<<<<<<<<<<<
- * 
+ *
  *         try:
  */
     __Pyx_GetModuleGlobalName(__pyx_t_10, __pyx_n_s_time); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 396, __pyx_L1_error)
@@ -4753,9 +4753,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_XDECREF_SET(__pyx_v_t0_fn, __pyx_t_3);
     __pyx_t_3 = 0;
 
-    /* "theano/scan/scan_perform.pyx":398
+    /* "aesara/scan/scan_perform.pyx":398
  *         t0_fn = time.time()
- * 
+ *
  *         try:             # <<<<<<<<<<<<<<
  *             fn()
  *         except Exception:
@@ -4769,8 +4769,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_XGOTREF(__pyx_t_23);
       /*try:*/ {
 
-        /* "theano/scan/scan_perform.pyx":399
- * 
+        /* "aesara/scan/scan_perform.pyx":399
+ *
  *         try:
  *             fn()             # <<<<<<<<<<<<<<
  *         except Exception:
@@ -4794,9 +4794,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
         __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-        /* "theano/scan/scan_perform.pyx":398
+        /* "aesara/scan/scan_perform.pyx":398
  *         t0_fn = time.time()
- * 
+ *
  *         try:             # <<<<<<<<<<<<<<
  *             fn()
  *         except Exception:
@@ -4812,7 +4812,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
       __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-      /* "theano/scan/scan_perform.pyx":400
+      /* "aesara/scan/scan_perform.pyx":400
  *         try:
  *             fn()
  *         except Exception:             # <<<<<<<<<<<<<<
@@ -4821,13 +4821,13 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
       __pyx_t_11 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
       if (__pyx_t_11) {
-        __Pyx_AddTraceback("theano.scan.scan_perform.perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
+        __Pyx_AddTraceback("aesara.scan.scan_perform.perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
         if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_1, &__pyx_t_10) < 0) __PYX_ERR(0, 400, __pyx_L67_except_error)
         __Pyx_GOTREF(__pyx_t_3);
         __Pyx_GOTREF(__pyx_t_1);
         __Pyx_GOTREF(__pyx_t_10);
 
-        /* "theano/scan/scan_perform.pyx":401
+        /* "aesara/scan/scan_perform.pyx":401
  *             fn()
  *         except Exception:
  *             if hasattr(fn, 'position_of_error'):             # <<<<<<<<<<<<<<
@@ -4838,7 +4838,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_15 = (__pyx_t_4 != 0);
         if (likely(__pyx_t_15)) {
 
-          /* "theano/scan/scan_perform.pyx":405
+          /* "aesara/scan/scan_perform.pyx":405
  *                 # the C VM needs this because the exception manipulation
  *                 # done by raise_with_op is not implemented in C.
  *                 if hasattr(fn, 'thunks'):             # <<<<<<<<<<<<<<
@@ -4849,7 +4849,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_4 = (__pyx_t_15 != 0);
           if (__pyx_t_4) {
 
-            /* "theano/scan/scan_perform.pyx":407
+            /* "aesara/scan/scan_perform.pyx":407
  *                 if hasattr(fn, 'thunks'):
  *                     # For the CVM
  *                     raise_with_op(fn.maker.fgraph,             # <<<<<<<<<<<<<<
@@ -4864,7 +4864,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_GOTREF(__pyx_t_26);
             __Pyx_DECREF(__pyx_t_25); __pyx_t_25 = 0;
 
-            /* "theano/scan/scan_perform.pyx":408
+            /* "aesara/scan/scan_perform.pyx":408
  *                     # For the CVM
  *                     raise_with_op(fn.maker.fgraph,
  *                                   fn.nodes[fn.position_of_error],             # <<<<<<<<<<<<<<
@@ -4880,7 +4880,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_DECREF(__pyx_t_25); __pyx_t_25 = 0;
             __Pyx_DECREF(__pyx_t_27); __pyx_t_27 = 0;
 
-            /* "theano/scan/scan_perform.pyx":409
+            /* "aesara/scan/scan_perform.pyx":409
  *                     raise_with_op(fn.maker.fgraph,
  *                                   fn.nodes[fn.position_of_error],
  *                                   fn.thunks[fn.position_of_error])             # <<<<<<<<<<<<<<
@@ -4951,7 +4951,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_DECREF(__pyx_t_24); __pyx_t_24 = 0;
             __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-            /* "theano/scan/scan_perform.pyx":405
+            /* "aesara/scan/scan_perform.pyx":405
  *                 # the C VM needs this because the exception manipulation
  *                 # done by raise_with_op is not implemented in C.
  *                 if hasattr(fn, 'thunks'):             # <<<<<<<<<<<<<<
@@ -4961,7 +4961,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             goto __pyx_L76;
           }
 
-          /* "theano/scan/scan_perform.pyx":415
+          /* "aesara/scan/scan_perform.pyx":415
  *                     # temps values So for now, we just don't print
  *                     # the extra shapes/strides info
  *                     raise_with_op(fn.maker.fgraph, fn.nodes[fn.position_of_error])             # <<<<<<<<<<<<<<
@@ -5037,7 +5037,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           }
           __pyx_L76:;
 
-          /* "theano/scan/scan_perform.pyx":401
+          /* "aesara/scan/scan_perform.pyx":401
  *             fn()
  *         except Exception:
  *             if hasattr(fn, 'position_of_error'):             # <<<<<<<<<<<<<<
@@ -5047,11 +5047,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L75;
         }
 
-        /* "theano/scan/scan_perform.pyx":418
+        /* "aesara/scan/scan_perform.pyx":418
  *             else:
  *                 # old-style linkers raise their own exceptions
  *                 raise             # <<<<<<<<<<<<<<
- * 
+ *
  *         dt_fn = time.time() - t0_fn
  */
         /*else*/ {
@@ -5059,7 +5059,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_GIVEREF(__pyx_t_1);
           __Pyx_XGIVEREF(__pyx_t_10);
           __Pyx_ErrRestoreWithState(__pyx_t_3, __pyx_t_1, __pyx_t_10);
-          __pyx_t_3 = 0; __pyx_t_1 = 0; __pyx_t_10 = 0; 
+          __pyx_t_3 = 0; __pyx_t_1 = 0; __pyx_t_10 = 0;
           __PYX_ERR(0, 418, __pyx_L67_except_error)
         }
         __pyx_L75:;
@@ -5071,9 +5071,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       goto __pyx_L67_except_error;
       __pyx_L67_except_error:;
 
-      /* "theano/scan/scan_perform.pyx":398
+      /* "aesara/scan/scan_perform.pyx":398
  *         t0_fn = time.time()
- * 
+ *
  *         try:             # <<<<<<<<<<<<<<
  *             fn()
  *         except Exception:
@@ -5091,9 +5091,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L72_try_end:;
     }
 
-    /* "theano/scan/scan_perform.pyx":420
+    /* "aesara/scan/scan_perform.pyx":420
  *                 raise
- * 
+ *
  *         dt_fn = time.time() - t0_fn             # <<<<<<<<<<<<<<
  *         t_fn += dt_fn
  *         if self.as_while:
@@ -5124,8 +5124,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_XDECREF_SET(__pyx_v_dt_fn, __pyx_t_3);
     __pyx_t_3 = 0;
 
-    /* "theano/scan/scan_perform.pyx":421
- * 
+    /* "aesara/scan/scan_perform.pyx":421
+ *
  *         dt_fn = time.time() - t0_fn
  *         t_fn += dt_fn             # <<<<<<<<<<<<<<
  *         if self.as_while:
@@ -5136,7 +5136,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_DECREF_SET(__pyx_v_t_fn, __pyx_t_3);
     __pyx_t_3 = 0;
 
-    /* "theano/scan/scan_perform.pyx":422
+    /* "aesara/scan/scan_perform.pyx":422
  *         dt_fn = time.time() - t0_fn
  *         t_fn += dt_fn
  *         if self.as_while:             # <<<<<<<<<<<<<<
@@ -5149,21 +5149,21 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     if (__pyx_t_4) {
 
-      /* "theano/scan/scan_perform.pyx":423
+      /* "aesara/scan/scan_perform.pyx":423
  *         t_fn += dt_fn
  *         if self.as_while:
  *             pdx = offset + n_shared_outs             # <<<<<<<<<<<<<<
  *             cond = output_storage[pdx].storage[0] == 0
- * 
+ *
  */
       __pyx_v_pdx = (__pyx_v_offset + __pyx_v_n_shared_outs);
 
-      /* "theano/scan/scan_perform.pyx":424
+      /* "aesara/scan/scan_perform.pyx":424
  *         if self.as_while:
  *             pdx = offset + n_shared_outs
  *             cond = output_storage[pdx].storage[0] == 0             # <<<<<<<<<<<<<<
- * 
- *         # 5.2. By calling fn() directly instead of calling the theano
+ *
+ *         # 5.2. By calling fn() directly instead of calling the aesara
  */
       __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_output_storage, __pyx_v_pdx, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 424, __pyx_L1_error)
       __Pyx_GOTREF(__pyx_t_3);
@@ -5180,7 +5180,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
       __pyx_v_cond = __pyx_t_11;
 
-      /* "theano/scan/scan_perform.pyx":422
+      /* "aesara/scan/scan_perform.pyx":422
  *         dt_fn = time.time() - t0_fn
  *         t_fn += dt_fn
  *         if self.as_while:             # <<<<<<<<<<<<<<
@@ -5189,7 +5189,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     }
 
-    /* "theano/scan/scan_perform.pyx":429
+    /* "aesara/scan/scan_perform.pyx":429
  *         # function, it is possible that the updates have not been
  *         # performed. Perform the updates if needed.
  *         offset_out = len(output_storage) - 1             # <<<<<<<<<<<<<<
@@ -5199,7 +5199,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_t_16 = PyObject_Length(__pyx_v_output_storage); if (unlikely(__pyx_t_16 == ((Py_ssize_t)-1))) __PYX_ERR(0, 429, __pyx_L1_error)
     __pyx_v_offset_out = (__pyx_t_16 - 1);
 
-    /* "theano/scan/scan_perform.pyx":430
+    /* "aesara/scan/scan_perform.pyx":430
  *         # performed. Perform the updates if needed.
  *         offset_out = len(output_storage) - 1
  *         if getattr(fn, 'need_update_inputs', True):             # <<<<<<<<<<<<<<
@@ -5212,7 +5212,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
     if (__pyx_t_4) {
 
-      /* "theano/scan/scan_perform.pyx":432
+      /* "aesara/scan/scan_perform.pyx":432
  *         if getattr(fn, 'need_update_inputs', True):
  *             # Update the inputs that have an update function
  *             for inp, storage in zip(self.fn.maker.expanded_inputs[::-1],             # <<<<<<<<<<<<<<
@@ -5231,7 +5231,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_GOTREF(__pyx_t_3);
       __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-      /* "theano/scan/scan_perform.pyx":433
+      /* "aesara/scan/scan_perform.pyx":433
  *             # Update the inputs that have an update function
  *             for inp, storage in zip(self.fn.maker.expanded_inputs[::-1],
  *                                     self.fn.input_storage[::-1]):             # <<<<<<<<<<<<<<
@@ -5247,7 +5247,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_GOTREF(__pyx_t_10);
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-      /* "theano/scan/scan_perform.pyx":432
+      /* "aesara/scan/scan_perform.pyx":432
  *         if getattr(fn, 'need_update_inputs', True):
  *             # Update the inputs that have an update function
  *             for inp, storage in zip(self.fn.maker.expanded_inputs[::-1],             # <<<<<<<<<<<<<<
@@ -5315,11 +5315,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           }
           #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
           if (likely(PyTuple_CheckExact(sequence))) {
-            __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); 
-            __pyx_t_2 = PyTuple_GET_ITEM(sequence, 1); 
+            __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0);
+            __pyx_t_2 = PyTuple_GET_ITEM(sequence, 1);
           } else {
-            __pyx_t_3 = PyList_GET_ITEM(sequence, 0); 
-            __pyx_t_2 = PyList_GET_ITEM(sequence, 1); 
+            __pyx_t_3 = PyList_GET_ITEM(sequence, 0);
+            __pyx_t_2 = PyList_GET_ITEM(sequence, 1);
           }
           __Pyx_INCREF(__pyx_t_3);
           __Pyx_INCREF(__pyx_t_2);
@@ -5356,7 +5356,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_storage, __pyx_t_2);
         __pyx_t_2 = 0;
 
-        /* "theano/scan/scan_perform.pyx":434
+        /* "aesara/scan/scan_perform.pyx":434
  *             for inp, storage in zip(self.fn.maker.expanded_inputs[::-1],
  *                                     self.fn.input_storage[::-1]):
  *                 if inp.update is not None:             # <<<<<<<<<<<<<<
@@ -5370,12 +5370,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_15 = (__pyx_t_4 != 0);
         if (__pyx_t_15) {
 
-          /* "theano/scan/scan_perform.pyx":435
+          /* "aesara/scan/scan_perform.pyx":435
  *                                     self.fn.input_storage[::-1]):
  *                 if inp.update is not None:
  *                     storage.data = output_storage[offset_out].data             # <<<<<<<<<<<<<<
  *                     offset_out -= 1
- * 
+ *
  */
           __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_output_storage, __pyx_v_offset_out, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 435, __pyx_L1_error)
           __Pyx_GOTREF(__pyx_t_10);
@@ -5385,16 +5385,16 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           if (__Pyx_PyObject_SetAttrStr(__pyx_v_storage, __pyx_n_s_data, __pyx_t_2) < 0) __PYX_ERR(0, 435, __pyx_L1_error)
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-          /* "theano/scan/scan_perform.pyx":436
+          /* "aesara/scan/scan_perform.pyx":436
  *                 if inp.update is not None:
  *                     storage.data = output_storage[offset_out].data
  *                     offset_out -= 1             # <<<<<<<<<<<<<<
- * 
+ *
  *         offset_out = 0
  */
           __pyx_v_offset_out = (__pyx_v_offset_out - 1);
 
-          /* "theano/scan/scan_perform.pyx":434
+          /* "aesara/scan/scan_perform.pyx":434
  *             for inp, storage in zip(self.fn.maker.expanded_inputs[::-1],
  *                                     self.fn.input_storage[::-1]):
  *                 if inp.update is not None:             # <<<<<<<<<<<<<<
@@ -5403,7 +5403,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
         }
 
-        /* "theano/scan/scan_perform.pyx":432
+        /* "aesara/scan/scan_perform.pyx":432
  *         if getattr(fn, 'need_update_inputs', True):
  *             # Update the inputs that have an update function
  *             for inp, storage in zip(self.fn.maker.expanded_inputs[::-1],             # <<<<<<<<<<<<<<
@@ -5413,7 +5413,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       }
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-      /* "theano/scan/scan_perform.pyx":430
+      /* "aesara/scan/scan_perform.pyx":430
  *         # performed. Perform the updates if needed.
  *         offset_out = len(output_storage) - 1
  *         if getattr(fn, 'need_update_inputs', True):             # <<<<<<<<<<<<<<
@@ -5422,17 +5422,17 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     }
 
-    /* "theano/scan/scan_perform.pyx":438
+    /* "aesara/scan/scan_perform.pyx":438
  *                     offset_out -= 1
- * 
+ *
  *         offset_out = 0             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 5.3 Copy over the values for mit_mot outputs
  */
     __pyx_v_offset_out = 0;
 
-    /* "theano/scan/scan_perform.pyx":441
- * 
+    /* "aesara/scan/scan_perform.pyx":441
+ *
  *         # 5.3 Copy over the values for mit_mot outputs
  *         mitmot_inp_offset = 0             # <<<<<<<<<<<<<<
  *         mitmot_out_idx = 0
@@ -5441,7 +5441,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_INCREF(__pyx_int_0);
     __Pyx_XDECREF_SET(__pyx_v_mitmot_inp_offset, __pyx_int_0);
 
-    /* "theano/scan/scan_perform.pyx":442
+    /* "aesara/scan/scan_perform.pyx":442
  *         # 5.3 Copy over the values for mit_mot outputs
  *         mitmot_inp_offset = 0
  *         mitmot_out_idx = 0             # <<<<<<<<<<<<<<
@@ -5451,7 +5451,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_INCREF(__pyx_int_0);
     __Pyx_XDECREF_SET(__pyx_v_mitmot_out_idx, __pyx_int_0);
 
-    /* "theano/scan/scan_perform.pyx":443
+    /* "aesara/scan/scan_perform.pyx":443
  *         mitmot_inp_offset = 0
  *         mitmot_out_idx = 0
  *         for j in xrange(self.n_mit_mot):             # <<<<<<<<<<<<<<
@@ -5466,7 +5466,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_20; __pyx_t_5+=1) {
       __pyx_v_j = __pyx_t_5;
 
-      /* "theano/scan/scan_perform.pyx":444
+      /* "aesara/scan/scan_perform.pyx":444
  *         mitmot_out_idx = 0
  *         for j in xrange(self.n_mit_mot):
  *             for k in self.mit_mot_out_slices[j]:             # <<<<<<<<<<<<<<
@@ -5522,7 +5522,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
         __pyx_v_k = __pyx_t_11;
 
-        /* "theano/scan/scan_perform.pyx":445
+        /* "aesara/scan/scan_perform.pyx":445
  *         for j in xrange(self.n_mit_mot):
  *             for k in self.mit_mot_out_slices[j]:
  *                 if mitmots_preallocated[<unsigned int>mitmot_out_idx]:             # <<<<<<<<<<<<<<
@@ -5534,11 +5534,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_15 = ((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_mitmots_preallocated.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_mitmots_preallocated.diminfo[0].strides)) != 0);
         if (__pyx_t_15) {
 
-          /* "theano/scan/scan_perform.pyx":448
+          /* "aesara/scan/scan_perform.pyx":448
  *                     # This output tap has been preallocated.
  *                     inp_idx = (mitmot_inp_offset +
  *                                self.tap_array[j].index(k))             # <<<<<<<<<<<<<<
- * 
+ *
  *                     # Verify whether the input points to the same data as
  */
           __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_tap_array); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 448, __pyx_L1_error)
@@ -5568,12 +5568,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_GOTREF(__pyx_t_2);
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-          /* "theano/scan/scan_perform.pyx":447
+          /* "aesara/scan/scan_perform.pyx":447
  *                 if mitmots_preallocated[<unsigned int>mitmot_out_idx]:
  *                     # This output tap has been preallocated.
  *                     inp_idx = (mitmot_inp_offset +             # <<<<<<<<<<<<<<
  *                                self.tap_array[j].index(k))
- * 
+ *
  */
           __pyx_t_10 = PyNumber_Add(__pyx_v_mitmot_inp_offset, __pyx_t_2); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 447, __pyx_L1_error)
           __Pyx_GOTREF(__pyx_t_10);
@@ -5581,7 +5581,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_XDECREF_SET(__pyx_v_inp_idx, __pyx_t_10);
           __pyx_t_10 = 0;
 
-          /* "theano/scan/scan_perform.pyx":452
+          /* "aesara/scan/scan_perform.pyx":452
  *                     # Verify whether the input points to the same data as
  *                     # it did before the execution of the inner function.
  *                     old_var = old_mitmot_input_storage[inp_idx]             # <<<<<<<<<<<<<<
@@ -5593,7 +5593,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_XDECREF_SET(__pyx_v_old_var, __pyx_t_10);
           __pyx_t_10 = 0;
 
-          /* "theano/scan/scan_perform.pyx":453
+          /* "aesara/scan/scan_perform.pyx":453
  *                     # it did before the execution of the inner function.
  *                     old_var = old_mitmot_input_storage[inp_idx]
  *                     new_var = input_storage[n_seqs + inp_idx].storage[0]             # <<<<<<<<<<<<<<
@@ -5617,7 +5617,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_XDECREF_SET(__pyx_v_new_var, __pyx_t_10);
           __pyx_t_10 = 0;
 
-          /* "theano/scan/scan_perform.pyx":454
+          /* "aesara/scan/scan_perform.pyx":454
  *                     old_var = old_mitmot_input_storage[inp_idx]
  *                     new_var = input_storage[n_seqs + inp_idx].storage[0]
  *                     if old_var is new_var:             # <<<<<<<<<<<<<<
@@ -5628,7 +5628,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_4 = (__pyx_t_15 != 0);
           if (__pyx_t_4) {
 
-            /* "theano/scan/scan_perform.pyx":455
+            /* "aesara/scan/scan_perform.pyx":455
  *                     new_var = input_storage[n_seqs + inp_idx].storage[0]
  *                     if old_var is new_var:
  *                         old_data = old_mitmot_input_data[inp_idx]             # <<<<<<<<<<<<<<
@@ -5640,7 +5640,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_XDECREF_SET(__pyx_v_old_data, __pyx_t_10);
             __pyx_t_10 = 0;
 
-            /* "theano/scan/scan_perform.pyx":456
+            /* "aesara/scan/scan_perform.pyx":456
  *                     if old_var is new_var:
  *                         old_data = old_mitmot_input_data[inp_idx]
  *                         if inps_is_tensor[n_seqs + inp_idx]:             # <<<<<<<<<<<<<<
@@ -5659,7 +5659,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
             if (__pyx_t_4) {
 
-              /* "theano/scan/scan_perform.pyx":457
+              /* "aesara/scan/scan_perform.pyx":457
  *                         old_data = old_mitmot_input_data[inp_idx]
  *                         if inps_is_tensor[n_seqs + inp_idx]:
  *                             same_data = (new_var.data == old_data)             # <<<<<<<<<<<<<<
@@ -5673,7 +5673,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
               __Pyx_XDECREF_SET(__pyx_v_same_data, __pyx_t_2);
               __pyx_t_2 = 0;
 
-              /* "theano/scan/scan_perform.pyx":456
+              /* "aesara/scan/scan_perform.pyx":456
  *                     if old_var is new_var:
  *                         old_data = old_mitmot_input_data[inp_idx]
  *                         if inps_is_tensor[n_seqs + inp_idx]:             # <<<<<<<<<<<<<<
@@ -5683,7 +5683,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
               goto __pyx_L90;
             }
 
-            /* "theano/scan/scan_perform.pyx":459
+            /* "aesara/scan/scan_perform.pyx":459
  *                             same_data = (new_var.data == old_data)
  *                         else:
  *                             same_data = (new_var.gpudata == old_data)             # <<<<<<<<<<<<<<
@@ -5700,7 +5700,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             }
             __pyx_L90:;
 
-            /* "theano/scan/scan_perform.pyx":454
+            /* "aesara/scan/scan_perform.pyx":454
  *                     old_var = old_mitmot_input_storage[inp_idx]
  *                     new_var = input_storage[n_seqs + inp_idx].storage[0]
  *                     if old_var is new_var:             # <<<<<<<<<<<<<<
@@ -5710,11 +5710,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             goto __pyx_L89;
           }
 
-          /* "theano/scan/scan_perform.pyx":461
+          /* "aesara/scan/scan_perform.pyx":461
  *                             same_data = (new_var.gpudata == old_data)
  *                     else:
  *                         same_data = False             # <<<<<<<<<<<<<<
- * 
+ *
  *                     # If the corresponding input storage has been replaced,
  */
           /*else*/ {
@@ -5723,7 +5723,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           }
           __pyx_L89:;
 
-          /* "theano/scan/scan_perform.pyx":466
+          /* "aesara/scan/scan_perform.pyx":466
  *                     # recover the value as usual. Otherwise, the input was
  *                     # modified inplace and nothing needs to be done.
  *                     if not same_data:             # <<<<<<<<<<<<<<
@@ -5734,11 +5734,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_15 = ((!__pyx_t_4) != 0);
           if (__pyx_t_15) {
 
-            /* "theano/scan/scan_perform.pyx":468
+            /* "aesara/scan/scan_perform.pyx":468
  *                     if not same_data:
  *                         outs[j][0][<unsigned int>(k + pos[j])] = \
  *                             input_storage[<unsigned int>(n_seqs + inp_idx)].storage[0]             # <<<<<<<<<<<<<<
- * 
+ *
  *                 else:
  */
             __pyx_t_10 = __Pyx_PyInt_From_unsigned_int(__pyx_v_n_seqs); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 468, __pyx_L1_error)
@@ -5757,12 +5757,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_GOTREF(__pyx_t_2);
             __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-            /* "theano/scan/scan_perform.pyx":467
+            /* "aesara/scan/scan_perform.pyx":467
  *                     # modified inplace and nothing needs to be done.
  *                     if not same_data:
  *                         outs[j][0][<unsigned int>(k + pos[j])] = \             # <<<<<<<<<<<<<<
  *                             input_storage[<unsigned int>(n_seqs + inp_idx)].storage[0]
- * 
+ *
  */
             __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_outs, __pyx_v_j, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 467, __pyx_L1_error)
             __Pyx_GOTREF(__pyx_t_10);
@@ -5774,7 +5774,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
             __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-            /* "theano/scan/scan_perform.pyx":466
+            /* "aesara/scan/scan_perform.pyx":466
  *                     # recover the value as usual. Otherwise, the input was
  *                     # modified inplace and nothing needs to be done.
  *                     if not same_data:             # <<<<<<<<<<<<<<
@@ -5783,7 +5783,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
           }
 
-          /* "theano/scan/scan_perform.pyx":445
+          /* "aesara/scan/scan_perform.pyx":445
  *         for j in xrange(self.n_mit_mot):
  *             for k in self.mit_mot_out_slices[j]:
  *                 if mitmots_preallocated[<unsigned int>mitmot_out_idx]:             # <<<<<<<<<<<<<<
@@ -5793,7 +5793,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L88;
         }
 
-        /* "theano/scan/scan_perform.pyx":473
+        /* "aesara/scan/scan_perform.pyx":473
  *                     # This output tap has not been preallocated, recover
  *                     # its value as usual
  *                     outs[j][0][<unsigned int>(k + pos[j])] = \             # <<<<<<<<<<<<<<
@@ -5802,12 +5802,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
         /*else*/ {
 
-          /* "theano/scan/scan_perform.pyx":474
+          /* "aesara/scan/scan_perform.pyx":474
  *                     # its value as usual
  *                     outs[j][0][<unsigned int>(k + pos[j])] = \
  *                             output_storage[<unsigned int>offset_out].storage[0]             # <<<<<<<<<<<<<<
  *                     offset_out += 1
- * 
+ *
  */
           __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_output_storage, ((unsigned int)__pyx_v_offset_out), unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 474, __pyx_L1_error)
           __Pyx_GOTREF(__pyx_t_2);
@@ -5818,7 +5818,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_GOTREF(__pyx_t_2);
           __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-          /* "theano/scan/scan_perform.pyx":473
+          /* "aesara/scan/scan_perform.pyx":473
  *                     # This output tap has not been preallocated, recover
  *                     # its value as usual
  *                     outs[j][0][<unsigned int>(k + pos[j])] = \             # <<<<<<<<<<<<<<
@@ -5835,22 +5835,22 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-          /* "theano/scan/scan_perform.pyx":475
+          /* "aesara/scan/scan_perform.pyx":475
  *                     outs[j][0][<unsigned int>(k + pos[j])] = \
  *                             output_storage[<unsigned int>offset_out].storage[0]
  *                     offset_out += 1             # <<<<<<<<<<<<<<
- * 
+ *
  *                 mitmot_out_idx += 1
  */
           __pyx_v_offset_out = (__pyx_v_offset_out + 1);
         }
         __pyx_L88:;
 
-        /* "theano/scan/scan_perform.pyx":477
+        /* "aesara/scan/scan_perform.pyx":477
  *                     offset_out += 1
- * 
+ *
  *                 mitmot_out_idx += 1             # <<<<<<<<<<<<<<
- * 
+ *
  *             mitmot_inp_offset += len(self.tap_array[j])
  */
         __pyx_t_2 = __Pyx_PyInt_AddObjC(__pyx_v_mitmot_out_idx, __pyx_int_1, 1, 1, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 477, __pyx_L1_error)
@@ -5858,7 +5858,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF_SET(__pyx_v_mitmot_out_idx, __pyx_t_2);
         __pyx_t_2 = 0;
 
-        /* "theano/scan/scan_perform.pyx":444
+        /* "aesara/scan/scan_perform.pyx":444
  *         mitmot_out_idx = 0
  *         for j in xrange(self.n_mit_mot):
  *             for k in self.mit_mot_out_slices[j]:             # <<<<<<<<<<<<<<
@@ -5868,11 +5868,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       }
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-      /* "theano/scan/scan_perform.pyx":479
+      /* "aesara/scan/scan_perform.pyx":479
  *                 mitmot_out_idx += 1
- * 
+ *
  *             mitmot_inp_offset += len(self.tap_array[j])             # <<<<<<<<<<<<<<
- * 
+ *
  *         # 5.4 Copy over the values for mit_sot/sit_sot outputs
  */
       __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_tap_array); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 479, __pyx_L1_error)
@@ -5891,8 +5891,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_1 = 0;
     }
 
-    /* "theano/scan/scan_perform.pyx":482
- * 
+    /* "aesara/scan/scan_perform.pyx":482
+ *
  *         # 5.4 Copy over the values for mit_sot/sit_sot outputs
  *         begin = n_mit_mot             # <<<<<<<<<<<<<<
  *         end   = n_outs
@@ -5900,29 +5900,29 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     __pyx_v_begin = __pyx_v_n_mit_mot;
 
-    /* "theano/scan/scan_perform.pyx":483
+    /* "aesara/scan/scan_perform.pyx":483
  *         # 5.4 Copy over the values for mit_sot/sit_sot outputs
  *         begin = n_mit_mot
  *         end   = n_outs             # <<<<<<<<<<<<<<
  *         offset_out -= n_mit_mot
- * 
+ *
  */
     __pyx_v_end = __pyx_v_n_outs;
 
-    /* "theano/scan/scan_perform.pyx":484
+    /* "aesara/scan/scan_perform.pyx":484
  *         begin = n_mit_mot
  *         end   = n_outs
  *         offset_out -= n_mit_mot             # <<<<<<<<<<<<<<
- * 
+ *
  *         for j in range(begin, end):
  */
     __pyx_v_offset_out = (__pyx_v_offset_out - __pyx_v_n_mit_mot);
 
-    /* "theano/scan/scan_perform.pyx":486
+    /* "aesara/scan/scan_perform.pyx":486
  *         offset_out -= n_mit_mot
- * 
+ *
  *         for j in range(begin, end):             # <<<<<<<<<<<<<<
- * 
+ *
  *             # Copy the output value to `outs`, if necessary
  */
     __pyx_t_5 = __pyx_v_end;
@@ -5930,8 +5930,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = __pyx_v_begin; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_j = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":489
- * 
+      /* "aesara/scan/scan_perform.pyx":489
+ *
  *             # Copy the output value to `outs`, if necessary
  *             if store_steps[j] == 1 or vector_outs[j] == 1:             # <<<<<<<<<<<<<<
  *                 outs[j][0][pos[j]] = output_storage[<unsigned int>(offset_out+j)].storage[0]
@@ -5949,7 +5949,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L95_bool_binop_done:;
       if (__pyx_t_15) {
 
-        /* "theano/scan/scan_perform.pyx":490
+        /* "aesara/scan/scan_perform.pyx":490
  *             # Copy the output value to `outs`, if necessary
  *             if store_steps[j] == 1 or vector_outs[j] == 1:
  *                 outs[j][0][pos[j]] = output_storage[<unsigned int>(offset_out+j)].storage[0]             # <<<<<<<<<<<<<<
@@ -5974,8 +5974,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":489
- * 
+        /* "aesara/scan/scan_perform.pyx":489
+ *
  *             # Copy the output value to `outs`, if necessary
  *             if store_steps[j] == 1 or vector_outs[j] == 1:             # <<<<<<<<<<<<<<
  *                 outs[j][0][pos[j]] = output_storage[<unsigned int>(offset_out+j)].storage[0]
@@ -5984,7 +5984,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L94;
       }
 
-      /* "theano/scan/scan_perform.pyx":494
+      /* "aesara/scan/scan_perform.pyx":494
  *                 # Check whether the initialization of the output storage map
  *                 # for this output has been reused.
  *                 old_var = old_output_storage[offset_out + j]             # <<<<<<<<<<<<<<
@@ -5998,7 +5998,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_old_var, __pyx_t_1);
         __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":495
+        /* "aesara/scan/scan_perform.pyx":495
  *                 # for this output has been reused.
  *                 old_var = old_output_storage[offset_out + j]
  *                 old_data = old_output_data[offset_out + j]             # <<<<<<<<<<<<<<
@@ -6011,7 +6011,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_old_data, __pyx_t_1);
         __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":496
+        /* "aesara/scan/scan_perform.pyx":496
  *                 old_var = old_output_storage[offset_out + j]
  *                 old_data = old_output_data[offset_out + j]
  *                 new_var = output_storage[offset_out + j].storage[0]             # <<<<<<<<<<<<<<
@@ -6030,7 +6030,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_new_var, __pyx_t_1);
         __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":497
+        /* "aesara/scan/scan_perform.pyx":497
  *                 old_data = old_output_data[offset_out + j]
  *                 new_var = output_storage[offset_out + j].storage[0]
  *                 if old_var is new_var:             # <<<<<<<<<<<<<<
@@ -6041,7 +6041,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_4 = (__pyx_t_15 != 0);
         if (__pyx_t_4) {
 
-          /* "theano/scan/scan_perform.pyx":498
+          /* "aesara/scan/scan_perform.pyx":498
  *                 new_var = output_storage[offset_out + j].storage[0]
  *                 if old_var is new_var:
  *                     if old_data is None:             # <<<<<<<<<<<<<<
@@ -6052,7 +6052,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_15 = (__pyx_t_4 != 0);
           if (__pyx_t_15) {
 
-            /* "theano/scan/scan_perform.pyx":499
+            /* "aesara/scan/scan_perform.pyx":499
  *                 if old_var is new_var:
  *                     if old_data is None:
  *                         output_reused = False             # <<<<<<<<<<<<<<
@@ -6062,7 +6062,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_INCREF(Py_False);
             __Pyx_XDECREF_SET(__pyx_v_output_reused, Py_False);
 
-            /* "theano/scan/scan_perform.pyx":498
+            /* "aesara/scan/scan_perform.pyx":498
  *                 new_var = output_storage[offset_out + j].storage[0]
  *                 if old_var is new_var:
  *                     if old_data is None:             # <<<<<<<<<<<<<<
@@ -6072,7 +6072,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             goto __pyx_L98;
           }
 
-          /* "theano/scan/scan_perform.pyx":500
+          /* "aesara/scan/scan_perform.pyx":500
  *                     if old_data is None:
  *                         output_reused = False
  *                     elif outs_is_tensor[offset_out + j]:             # <<<<<<<<<<<<<<
@@ -6083,7 +6083,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_15 = ((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_outs_is_tensor.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_outs_is_tensor.diminfo[0].strides)) != 0);
           if (__pyx_t_15) {
 
-            /* "theano/scan/scan_perform.pyx":501
+            /* "aesara/scan/scan_perform.pyx":501
  *                         output_reused = False
  *                     elif outs_is_tensor[offset_out + j]:
  *                         output_reused = (new_var.data == old_data)             # <<<<<<<<<<<<<<
@@ -6097,7 +6097,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_XDECREF_SET(__pyx_v_output_reused, __pyx_t_10);
             __pyx_t_10 = 0;
 
-            /* "theano/scan/scan_perform.pyx":500
+            /* "aesara/scan/scan_perform.pyx":500
  *                     if old_data is None:
  *                         output_reused = False
  *                     elif outs_is_tensor[offset_out + j]:             # <<<<<<<<<<<<<<
@@ -6107,7 +6107,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             goto __pyx_L98;
           }
 
-          /* "theano/scan/scan_perform.pyx":503
+          /* "aesara/scan/scan_perform.pyx":503
  *                         output_reused = (new_var.data == old_data)
  *                     else:
  *                         output_reused = (new_var.gpudata == old_data)             # <<<<<<<<<<<<<<
@@ -6124,7 +6124,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           }
           __pyx_L98:;
 
-          /* "theano/scan/scan_perform.pyx":497
+          /* "aesara/scan/scan_perform.pyx":497
  *                 old_data = old_output_data[offset_out + j]
  *                 new_var = output_storage[offset_out + j].storage[0]
  *                 if old_var is new_var:             # <<<<<<<<<<<<<<
@@ -6134,11 +6134,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L97;
         }
 
-        /* "theano/scan/scan_perform.pyx":505
+        /* "aesara/scan/scan_perform.pyx":505
  *                         output_reused = (new_var.gpudata == old_data)
  *                 else:
  *                     output_reused = False             # <<<<<<<<<<<<<<
- * 
+ *
  *                 if not output_reused:
  */
         /*else*/ {
@@ -6147,9 +6147,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         }
         __pyx_L97:;
 
-        /* "theano/scan/scan_perform.pyx":507
+        /* "aesara/scan/scan_perform.pyx":507
  *                     output_reused = False
- * 
+ *
  *                 if not output_reused:             # <<<<<<<<<<<<<<
  *                     outs[j][0][pos[j]] = \
  *                         output_storage[<unsigned int>(offset_out+j)].storage[0]
@@ -6158,12 +6158,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_4 = ((!__pyx_t_15) != 0);
         if (__pyx_t_4) {
 
-          /* "theano/scan/scan_perform.pyx":509
+          /* "aesara/scan/scan_perform.pyx":509
  *                 if not output_reused:
  *                     outs[j][0][pos[j]] = \
  *                         output_storage[<unsigned int>(offset_out+j)].storage[0]             # <<<<<<<<<<<<<<
- * 
- * 
+ *
+ *
  */
           __pyx_t_8 = ((unsigned int)(__pyx_v_offset_out + __pyx_v_j));
           __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_output_storage, __pyx_t_8, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 509, __pyx_L1_error)
@@ -6175,12 +6175,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_GOTREF(__pyx_t_1);
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-          /* "theano/scan/scan_perform.pyx":508
- * 
+          /* "aesara/scan/scan_perform.pyx":508
+ *
  *                 if not output_reused:
  *                     outs[j][0][pos[j]] = \             # <<<<<<<<<<<<<<
  *                         output_storage[<unsigned int>(offset_out+j)].storage[0]
- * 
+ *
  */
           __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_outs, __pyx_v_j, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 508, __pyx_L1_error)
           __Pyx_GOTREF(__pyx_t_10);
@@ -6191,9 +6191,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-          /* "theano/scan/scan_perform.pyx":507
+          /* "aesara/scan/scan_perform.pyx":507
  *                     output_reused = False
- * 
+ *
  *                 if not output_reused:             # <<<<<<<<<<<<<<
  *                     outs[j][0][pos[j]] = \
  *                         output_storage[<unsigned int>(offset_out+j)].storage[0]
@@ -6203,8 +6203,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L94:;
     }
 
-    /* "theano/scan/scan_perform.pyx":513
- * 
+    /* "aesara/scan/scan_perform.pyx":513
+ *
  *         # 5.5 Copy over the values for nit_sot outputs
  *         begin  = end             # <<<<<<<<<<<<<<
  *         end   += n_nit_sot
@@ -6212,20 +6212,20 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     __pyx_v_begin = __pyx_v_end;
 
-    /* "theano/scan/scan_perform.pyx":514
+    /* "aesara/scan/scan_perform.pyx":514
  *         # 5.5 Copy over the values for nit_sot outputs
  *         begin  = end
  *         end   += n_nit_sot             # <<<<<<<<<<<<<<
  *         for j in range(begin,end):
- * 
+ *
  */
     __pyx_v_end = (__pyx_v_end + __pyx_v_n_nit_sot);
 
-    /* "theano/scan/scan_perform.pyx":515
+    /* "aesara/scan/scan_perform.pyx":515
  *         begin  = end
  *         end   += n_nit_sot
  *         for j in range(begin,end):             # <<<<<<<<<<<<<<
- * 
+ *
  *             if i == 0:
  */
     __pyx_t_5 = __pyx_v_end;
@@ -6233,9 +6233,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = __pyx_v_begin; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_j = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":517
+      /* "aesara/scan/scan_perform.pyx":517
  *         for j in range(begin,end):
- * 
+ *
  *             if i == 0:             # <<<<<<<<<<<<<<
  *                 jout = j+offset_out
  *                 shape = (store_steps[j],) + output_storage[jout].storage[0].shape
@@ -6243,8 +6243,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_4 = ((__pyx_v_i == 0) != 0);
       if (__pyx_t_4) {
 
-        /* "theano/scan/scan_perform.pyx":518
- * 
+        /* "aesara/scan/scan_perform.pyx":518
+ *
  *             if i == 0:
  *                 jout = j+offset_out             # <<<<<<<<<<<<<<
  *                 shape = (store_steps[j],) + output_storage[jout].storage[0].shape
@@ -6252,7 +6252,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
         __pyx_v_jout = (__pyx_v_j + __pyx_v_offset_out);
 
-        /* "theano/scan/scan_perform.pyx":519
+        /* "aesara/scan/scan_perform.pyx":519
  *             if i == 0:
  *                 jout = j+offset_out
  *                 shape = (store_steps[j],) + output_storage[jout].storage[0].shape             # <<<<<<<<<<<<<<
@@ -6284,7 +6284,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_shape, __pyx_t_1);
         __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":520
+        /* "aesara/scan/scan_perform.pyx":520
  *                 jout = j+offset_out
  *                 shape = (store_steps[j],) + output_storage[jout].storage[0].shape
  *                 dtype = output_storage[jout].storage[0].dtype             # <<<<<<<<<<<<<<
@@ -6305,7 +6305,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_dtype, __pyx_t_10);
         __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":521
+        /* "aesara/scan/scan_perform.pyx":521
  *                 shape = (store_steps[j],) + output_storage[jout].storage[0].shape
  *                 dtype = output_storage[jout].storage[0].dtype
  *                 if (outs[j][0] is None or             # <<<<<<<<<<<<<<
@@ -6326,7 +6326,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L104_bool_binop_done;
         }
 
-        /* "theano/scan/scan_perform.pyx":522
+        /* "aesara/scan/scan_perform.pyx":522
  *                 dtype = output_storage[jout].storage[0].dtype
  *                 if (outs[j][0] is None or
  *                         outs[j][0].shape[0] < store_steps[j] or             # <<<<<<<<<<<<<<
@@ -6357,7 +6357,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L104_bool_binop_done;
         }
 
-        /* "theano/scan/scan_perform.pyx":523
+        /* "aesara/scan/scan_perform.pyx":523
  *                 if (outs[j][0] is None or
  *                         outs[j][0].shape[0] < store_steps[j] or
  *                         outs[j][0].shape[1:] != shape[1:] or             # <<<<<<<<<<<<<<
@@ -6388,7 +6388,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L104_bool_binop_done;
         }
 
-        /* "theano/scan/scan_perform.pyx":524
+        /* "aesara/scan/scan_perform.pyx":524
  *                         outs[j][0].shape[0] < store_steps[j] or
  *                         outs[j][0].shape[1:] != shape[1:] or
  *                         outs[j][0].dtype != dtype ):             # <<<<<<<<<<<<<<
@@ -6410,7 +6410,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_4 = __pyx_t_14;
         __pyx_L104_bool_binop_done:;
 
-        /* "theano/scan/scan_perform.pyx":521
+        /* "aesara/scan/scan_perform.pyx":521
  *                 shape = (store_steps[j],) + output_storage[jout].storage[0].shape
  *                 dtype = output_storage[jout].storage[0].dtype
  *                 if (outs[j][0] is None or             # <<<<<<<<<<<<<<
@@ -6419,7 +6419,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
         if (__pyx_t_4) {
 
-          /* "theano/scan/scan_perform.pyx":525
+          /* "aesara/scan/scan_perform.pyx":525
  *                         outs[j][0].shape[1:] != shape[1:] or
  *                         outs[j][0].dtype != dtype ):
  *                     outs[j][0] = node.outputs[j].type.value_zeros(shape)             # <<<<<<<<<<<<<<
@@ -6458,7 +6458,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-          /* "theano/scan/scan_perform.pyx":521
+          /* "aesara/scan/scan_perform.pyx":521
  *                 shape = (store_steps[j],) + output_storage[jout].storage[0].shape
  *                 dtype = output_storage[jout].storage[0].dtype
  *                 if (outs[j][0] is None or             # <<<<<<<<<<<<<<
@@ -6468,7 +6468,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L103;
         }
 
-        /* "theano/scan/scan_perform.pyx":526
+        /* "aesara/scan/scan_perform.pyx":526
  *                         outs[j][0].dtype != dtype ):
  *                     outs[j][0] = node.outputs[j].type.value_zeros(shape)
  *                 elif outs[j][0].shape[0] != store_steps[j]:             # <<<<<<<<<<<<<<
@@ -6495,7 +6495,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
         if (__pyx_t_4) {
 
-          /* "theano/scan/scan_perform.pyx":527
+          /* "aesara/scan/scan_perform.pyx":527
  *                     outs[j][0] = node.outputs[j].type.value_zeros(shape)
  *                 elif outs[j][0].shape[0] != store_steps[j]:
  *                     outs[j][0] = outs[j][0][:store_steps[j]]             # <<<<<<<<<<<<<<
@@ -6516,7 +6516,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
           __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-          /* "theano/scan/scan_perform.pyx":526
+          /* "aesara/scan/scan_perform.pyx":526
  *                         outs[j][0].dtype != dtype ):
  *                     outs[j][0] = node.outputs[j].type.value_zeros(shape)
  *                 elif outs[j][0].shape[0] != store_steps[j]:             # <<<<<<<<<<<<<<
@@ -6526,7 +6526,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         }
         __pyx_L103:;
 
-        /* "theano/scan/scan_perform.pyx":528
+        /* "aesara/scan/scan_perform.pyx":528
  *                 elif outs[j][0].shape[0] != store_steps[j]:
  *                     outs[j][0] = outs[j][0][:store_steps[j]]
  *                 outs[j][0][pos[j]] = output_storage[jout].storage[0]             # <<<<<<<<<<<<<<
@@ -6550,9 +6550,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":517
+        /* "aesara/scan/scan_perform.pyx":517
  *         for j in range(begin,end):
- * 
+ *
  *             if i == 0:             # <<<<<<<<<<<<<<
  *                 jout = j+offset_out
  *                 shape = (store_steps[j],) + output_storage[jout].storage[0].shape
@@ -6560,7 +6560,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L102;
       }
 
-      /* "theano/scan/scan_perform.pyx":529
+      /* "aesara/scan/scan_perform.pyx":529
  *                     outs[j][0] = outs[j][0][:store_steps[j]]
  *                 outs[j][0][pos[j]] = output_storage[jout].storage[0]
  *             elif store_steps[j] == 1 or vector_outs[j] == 1:             # <<<<<<<<<<<<<<
@@ -6579,7 +6579,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L108_bool_binop_done:;
       if (__pyx_t_4) {
 
-        /* "theano/scan/scan_perform.pyx":530
+        /* "aesara/scan/scan_perform.pyx":530
  *                 outs[j][0][pos[j]] = output_storage[jout].storage[0]
  *             elif store_steps[j] == 1 or vector_outs[j] == 1:
  *                 outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]             # <<<<<<<<<<<<<<
@@ -6604,7 +6604,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":529
+        /* "aesara/scan/scan_perform.pyx":529
  *                     outs[j][0] = outs[j][0][:store_steps[j]]
  *                 outs[j][0][pos[j]] = output_storage[jout].storage[0]
  *             elif store_steps[j] == 1 or vector_outs[j] == 1:             # <<<<<<<<<<<<<<
@@ -6614,7 +6614,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L102;
       }
 
-      /* "theano/scan/scan_perform.pyx":534
+      /* "aesara/scan/scan_perform.pyx":534
  *                 # Check whether the initialization of the output storage map
  *                 # for this output has been reused.
  *                 old_var = old_output_storage[offset_out + j]             # <<<<<<<<<<<<<<
@@ -6628,7 +6628,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_old_var, __pyx_t_10);
         __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":535
+        /* "aesara/scan/scan_perform.pyx":535
  *                 # for this output has been reused.
  *                 old_var = old_output_storage[offset_out + j]
  *                 old_data = old_output_data[offset_out + j]             # <<<<<<<<<<<<<<
@@ -6641,7 +6641,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_old_data, __pyx_t_10);
         __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":536
+        /* "aesara/scan/scan_perform.pyx":536
  *                 old_var = old_output_storage[offset_out + j]
  *                 old_data = old_output_data[offset_out + j]
  *                 new_var = output_storage[offset_out + j].storage[0]             # <<<<<<<<<<<<<<
@@ -6660,7 +6660,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_new_var, __pyx_t_10);
         __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":537
+        /* "aesara/scan/scan_perform.pyx":537
  *                 old_data = old_output_data[offset_out + j]
  *                 new_var = output_storage[offset_out + j].storage[0]
  *                 if old_var is new_var:             # <<<<<<<<<<<<<<
@@ -6671,7 +6671,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_14 = (__pyx_t_4 != 0);
         if (__pyx_t_14) {
 
-          /* "theano/scan/scan_perform.pyx":538
+          /* "aesara/scan/scan_perform.pyx":538
  *                 new_var = output_storage[offset_out + j].storage[0]
  *                 if old_var is new_var:
  *                     if old_data is None:             # <<<<<<<<<<<<<<
@@ -6682,7 +6682,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_4 = (__pyx_t_14 != 0);
           if (__pyx_t_4) {
 
-            /* "theano/scan/scan_perform.pyx":539
+            /* "aesara/scan/scan_perform.pyx":539
  *                 if old_var is new_var:
  *                     if old_data is None:
  *                         output_reused = False             # <<<<<<<<<<<<<<
@@ -6692,7 +6692,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_INCREF(Py_False);
             __Pyx_XDECREF_SET(__pyx_v_output_reused, Py_False);
 
-            /* "theano/scan/scan_perform.pyx":538
+            /* "aesara/scan/scan_perform.pyx":538
  *                 new_var = output_storage[offset_out + j].storage[0]
  *                 if old_var is new_var:
  *                     if old_data is None:             # <<<<<<<<<<<<<<
@@ -6702,7 +6702,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             goto __pyx_L111;
           }
 
-          /* "theano/scan/scan_perform.pyx":540
+          /* "aesara/scan/scan_perform.pyx":540
  *                     if old_data is None:
  *                         output_reused = False
  *                     elif outs_is_tensor[offset_out + j]:             # <<<<<<<<<<<<<<
@@ -6713,7 +6713,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           __pyx_t_4 = ((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_outs_is_tensor.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_outs_is_tensor.diminfo[0].strides)) != 0);
           if (__pyx_t_4) {
 
-            /* "theano/scan/scan_perform.pyx":541
+            /* "aesara/scan/scan_perform.pyx":541
  *                         output_reused = False
  *                     elif outs_is_tensor[offset_out + j]:
  *                         output_reused = (new_var.data == old_data)             # <<<<<<<<<<<<<<
@@ -6727,7 +6727,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_XDECREF_SET(__pyx_v_output_reused, __pyx_t_2);
             __pyx_t_2 = 0;
 
-            /* "theano/scan/scan_perform.pyx":540
+            /* "aesara/scan/scan_perform.pyx":540
  *                     if old_data is None:
  *                         output_reused = False
  *                     elif outs_is_tensor[offset_out + j]:             # <<<<<<<<<<<<<<
@@ -6737,7 +6737,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             goto __pyx_L111;
           }
 
-          /* "theano/scan/scan_perform.pyx":543
+          /* "aesara/scan/scan_perform.pyx":543
  *                         output_reused = (new_var.data == old_data)
  *                     else:
  *                         output_reused = (new_var.gpudata == old_data)             # <<<<<<<<<<<<<<
@@ -6754,7 +6754,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           }
           __pyx_L111:;
 
-          /* "theano/scan/scan_perform.pyx":537
+          /* "aesara/scan/scan_perform.pyx":537
  *                 old_data = old_output_data[offset_out + j]
  *                 new_var = output_storage[offset_out + j].storage[0]
  *                 if old_var is new_var:             # <<<<<<<<<<<<<<
@@ -6764,11 +6764,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
           goto __pyx_L110;
         }
 
-        /* "theano/scan/scan_perform.pyx":545
+        /* "aesara/scan/scan_perform.pyx":545
  *                         output_reused = (new_var.gpudata == old_data)
  *                 else:
  *                     output_reused = False             # <<<<<<<<<<<<<<
- * 
+ *
  *                 if not output_reused:
  */
         /*else*/ {
@@ -6777,9 +6777,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         }
         __pyx_L110:;
 
-        /* "theano/scan/scan_perform.pyx":547
+        /* "aesara/scan/scan_perform.pyx":547
  *                     output_reused = False
- * 
+ *
  *                 if not output_reused:             # <<<<<<<<<<<<<<
  *                     try:
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
@@ -6788,8 +6788,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __pyx_t_14 = ((!__pyx_t_4) != 0);
         if (__pyx_t_14) {
 
-          /* "theano/scan/scan_perform.pyx":548
- * 
+          /* "aesara/scan/scan_perform.pyx":548
+ *
  *                 if not output_reused:
  *                     try:             # <<<<<<<<<<<<<<
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
@@ -6804,7 +6804,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_XGOTREF(__pyx_t_21);
             /*try:*/ {
 
-              /* "theano/scan/scan_perform.pyx":549
+              /* "aesara/scan/scan_perform.pyx":549
  *                 if not output_reused:
  *                     try:
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]             # <<<<<<<<<<<<<<
@@ -6829,8 +6829,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
               __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
               __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-              /* "theano/scan/scan_perform.pyx":548
- * 
+              /* "aesara/scan/scan_perform.pyx":548
+ *
  *                 if not output_reused:
  *                     try:             # <<<<<<<<<<<<<<
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
@@ -6853,7 +6853,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __Pyx_XDECREF(__pyx_t_29); __pyx_t_29 = 0;
             __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
 
-            /* "theano/scan/scan_perform.pyx":550
+            /* "aesara/scan/scan_perform.pyx":550
  *                     try:
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
  *                     except ValueError as e:             # <<<<<<<<<<<<<<
@@ -6862,7 +6862,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
             __pyx_t_11 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_ValueError);
             if (__pyx_t_11) {
-              __Pyx_AddTraceback("theano.scan.scan_perform.perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
+              __Pyx_AddTraceback("aesara.scan.scan_perform.perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
               if (__Pyx_GetException(&__pyx_t_10, &__pyx_t_1, &__pyx_t_2) < 0) __PYX_ERR(0, 550, __pyx_L115_except_error)
               __Pyx_GOTREF(__pyx_t_10);
               __Pyx_GOTREF(__pyx_t_1);
@@ -6871,7 +6871,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
               __pyx_v_e = __pyx_t_1;
               /*try:*/ {
 
-                /* "theano/scan/scan_perform.pyx":551
+                /* "aesara/scan/scan_perform.pyx":551
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
  *                     except ValueError as e:
  *                         if i == 0:             # <<<<<<<<<<<<<<
@@ -6881,7 +6881,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
                 __pyx_t_14 = ((__pyx_v_i == 0) != 0);
                 if (unlikely(__pyx_t_14)) {
 
-                  /* "theano/scan/scan_perform.pyx":552
+                  /* "aesara/scan/scan_perform.pyx":552
  *                     except ValueError as e:
  *                         if i == 0:
  *                             raise             # <<<<<<<<<<<<<<
@@ -6892,10 +6892,10 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
                   __Pyx_GIVEREF(__pyx_t_1);
                   __Pyx_XGIVEREF(__pyx_t_2);
                   __Pyx_ErrRestoreWithState(__pyx_t_10, __pyx_t_1, __pyx_t_2);
-                  __pyx_t_10 = 0; __pyx_t_1 = 0; __pyx_t_2 = 0; 
+                  __pyx_t_10 = 0; __pyx_t_1 = 0; __pyx_t_2 = 0;
                   __PYX_ERR(0, 552, __pyx_L126_error)
 
-                  /* "theano/scan/scan_perform.pyx":551
+                  /* "aesara/scan/scan_perform.pyx":551
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
  *                     except ValueError as e:
  *                         if i == 0:             # <<<<<<<<<<<<<<
@@ -6904,7 +6904,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
                 }
 
-                /* "theano/scan/scan_perform.pyx":553
+                /* "aesara/scan/scan_perform.pyx":553
  *                         if i == 0:
  *                             raise
  *                         raise ValueError(             # <<<<<<<<<<<<<<
@@ -6918,7 +6918,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
                 __PYX_ERR(0, 553, __pyx_L126_error)
               }
 
-              /* "theano/scan/scan_perform.pyx":550
+              /* "aesara/scan/scan_perform.pyx":550
  *                     try:
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
  *                     except ValueError as e:             # <<<<<<<<<<<<<<
@@ -6970,8 +6970,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             goto __pyx_L115_except_error;
             __pyx_L115_except_error:;
 
-            /* "theano/scan/scan_perform.pyx":548
- * 
+            /* "aesara/scan/scan_perform.pyx":548
+ *
  *                 if not output_reused:
  *                     try:             # <<<<<<<<<<<<<<
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
@@ -6985,9 +6985,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
             __pyx_L120_try_end:;
           }
 
-          /* "theano/scan/scan_perform.pyx":547
+          /* "aesara/scan/scan_perform.pyx":547
  *                     output_reused = False
- * 
+ *
  *                 if not output_reused:             # <<<<<<<<<<<<<<
  *                     try:
  *                         outs[j][0][pos[j]] = output_storage[j+offset_out].storage[0]
@@ -6997,7 +6997,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_L102:;
     }
 
-    /* "theano/scan/scan_perform.pyx":562
+    /* "aesara/scan/scan_perform.pyx":562
  *         # 5.6 Copy over the values for outputs corresponding to shared
  *         # variables
  *         begin  = end             # <<<<<<<<<<<<<<
@@ -7006,7 +7006,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     __pyx_v_begin = __pyx_v_end;
 
-    /* "theano/scan/scan_perform.pyx":563
+    /* "aesara/scan/scan_perform.pyx":563
  *         # variables
  *         begin  = end
  *         end   += n_shared_outs             # <<<<<<<<<<<<<<
@@ -7015,7 +7015,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     __pyx_v_end = (__pyx_v_end + __pyx_v_n_shared_outs);
 
-    /* "theano/scan/scan_perform.pyx":564
+    /* "aesara/scan/scan_perform.pyx":564
  *         begin  = end
  *         end   += n_shared_outs
  *         for j in range(begin,end):             # <<<<<<<<<<<<<<
@@ -7027,20 +7027,20 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = __pyx_v_begin; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_j = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":565
+      /* "aesara/scan/scan_perform.pyx":565
  *         end   += n_shared_outs
  *         for j in range(begin,end):
  *             jout = j +offset_out             # <<<<<<<<<<<<<<
  *             outs[j][0] = output_storage[jout].storage[0]
- * 
+ *
  */
       __pyx_v_jout = (__pyx_v_j + __pyx_v_offset_out);
 
-      /* "theano/scan/scan_perform.pyx":566
+      /* "aesara/scan/scan_perform.pyx":566
  *         for j in range(begin,end):
  *             jout = j +offset_out
  *             outs[j][0] = output_storage[jout].storage[0]             # <<<<<<<<<<<<<<
- * 
+ *
  *         for idx in range(lenpos):
  */
       __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_output_storage, __pyx_v_jout, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 566, __pyx_L1_error)
@@ -7058,9 +7058,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
     }
 
-    /* "theano/scan/scan_perform.pyx":568
+    /* "aesara/scan/scan_perform.pyx":568
  *             outs[j][0] = output_storage[jout].storage[0]
- * 
+ *
  *         for idx in range(lenpos):             # <<<<<<<<<<<<<<
  *             pos[idx] = (pos[idx]+1)%store_steps[idx]
  *         i = i + 1
@@ -7070,12 +7070,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
       __pyx_v_idx = __pyx_t_7;
 
-      /* "theano/scan/scan_perform.pyx":569
- * 
+      /* "aesara/scan/scan_perform.pyx":569
+ *
  *         for idx in range(lenpos):
  *             pos[idx] = (pos[idx]+1)%store_steps[idx]             # <<<<<<<<<<<<<<
  *         i = i + 1
- * 
+ *
  */
       __pyx_t_9 = ((__pyx_v_pos[__pyx_v_idx]) + 1);
       if (unlikely((__pyx_v_store_steps[__pyx_v_idx]) == 0)) {
@@ -7085,18 +7085,18 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       (__pyx_v_pos[__pyx_v_idx]) = __Pyx_mod_long(__pyx_t_9, (__pyx_v_store_steps[__pyx_v_idx]));
     }
 
-    /* "theano/scan/scan_perform.pyx":570
+    /* "aesara/scan/scan_perform.pyx":570
  *         for idx in range(lenpos):
  *             pos[idx] = (pos[idx]+1)%store_steps[idx]
  *         i = i + 1             # <<<<<<<<<<<<<<
- * 
+ *
  *     # 6. Check if you need to re-order output buffers
  */
     __pyx_v_i = (__pyx_v_i + 1);
   }
 
-  /* "theano/scan/scan_perform.pyx":573
- * 
+  /* "aesara/scan/scan_perform.pyx":573
+ *
  *     # 6. Check if you need to re-order output buffers
  *     begin = n_mit_mot             # <<<<<<<<<<<<<<
  *     end   = n_outs + n_nit_sot
@@ -7104,7 +7104,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_begin = __pyx_v_n_mit_mot;
 
-  /* "theano/scan/scan_perform.pyx":574
+  /* "aesara/scan/scan_perform.pyx":574
  *     # 6. Check if you need to re-order output buffers
  *     begin = n_mit_mot
  *     end   = n_outs + n_nit_sot             # <<<<<<<<<<<<<<
@@ -7113,7 +7113,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   __pyx_v_end = (__pyx_v_n_outs + __pyx_v_n_nit_sot);
 
-  /* "theano/scan/scan_perform.pyx":575
+  /* "aesara/scan/scan_perform.pyx":575
  *     begin = n_mit_mot
  *     end   = n_outs + n_nit_sot
  *     for idx in range(begin, end):             # <<<<<<<<<<<<<<
@@ -7125,12 +7125,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   for (__pyx_t_7 = __pyx_v_begin; __pyx_t_7 < __pyx_t_6; __pyx_t_7+=1) {
     __pyx_v_idx = __pyx_t_7;
 
-    /* "theano/scan/scan_perform.pyx":576
+    /* "aesara/scan/scan_perform.pyx":576
  *     end   = n_outs + n_nit_sot
  *     for idx in range(begin, end):
  *         if ( store_steps[idx] < i-mintaps[idx] and             # <<<<<<<<<<<<<<
  *             pos[idx] < store_steps[idx] ):
- * 
+ *
  */
     __pyx_t_12 = __pyx_v_idx;
     __pyx_t_4 = (((__pyx_v_store_steps[__pyx_v_idx]) < (__pyx_v_i - (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_mintaps.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_mintaps.diminfo[0].strides)))) != 0);
@@ -7140,37 +7140,37 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       goto __pyx_L140_bool_binop_done;
     }
 
-    /* "theano/scan/scan_perform.pyx":577
+    /* "aesara/scan/scan_perform.pyx":577
  *     for idx in range(begin, end):
  *         if ( store_steps[idx] < i-mintaps[idx] and
  *             pos[idx] < store_steps[idx] ):             # <<<<<<<<<<<<<<
- * 
+ *
  *             pdx = pos[idx]
  */
     __pyx_t_4 = (((__pyx_v_pos[__pyx_v_idx]) < (__pyx_v_store_steps[__pyx_v_idx])) != 0);
     __pyx_t_14 = __pyx_t_4;
     __pyx_L140_bool_binop_done:;
 
-    /* "theano/scan/scan_perform.pyx":576
+    /* "aesara/scan/scan_perform.pyx":576
  *     end   = n_outs + n_nit_sot
  *     for idx in range(begin, end):
  *         if ( store_steps[idx] < i-mintaps[idx] and             # <<<<<<<<<<<<<<
  *             pos[idx] < store_steps[idx] ):
- * 
+ *
  */
     if (__pyx_t_14) {
 
-      /* "theano/scan/scan_perform.pyx":579
+      /* "aesara/scan/scan_perform.pyx":579
  *             pos[idx] < store_steps[idx] ):
- * 
+ *
  *             pdx = pos[idx]             # <<<<<<<<<<<<<<
  *             if pdx >= store_steps[idx]//2 :
  *                 # It seems inefficient to copy the bigger part of the
  */
       __pyx_v_pdx = (__pyx_v_pos[__pyx_v_idx]);
 
-      /* "theano/scan/scan_perform.pyx":580
- * 
+      /* "aesara/scan/scan_perform.pyx":580
+ *
  *             pdx = pos[idx]
  *             if pdx >= store_steps[idx]//2 :             # <<<<<<<<<<<<<<
  *                 # It seems inefficient to copy the bigger part of the
@@ -7179,11 +7179,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __pyx_t_14 = ((__pyx_v_pdx >= __Pyx_div_long((__pyx_v_store_steps[__pyx_v_idx]), 2)) != 0);
       if (__pyx_t_14) {
 
-        /* "theano/scan/scan_perform.pyx":587
+        /* "aesara/scan/scan_perform.pyx":587
  *                 # This way, there will be no information overwritten
  *                 # before it is read (as it used to happen).
  *                 shape = (pdx,)+ outs[idx][0].shape[1:]             # <<<<<<<<<<<<<<
- * 
+ *
  *                 tmp = node.outputs[idx].type.value_zeros(shape)
  */
         __pyx_t_2 = __Pyx_PyInt_From_unsigned_int(__pyx_v_pdx); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 587, __pyx_L1_error)
@@ -7211,9 +7211,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_shape, __pyx_t_2);
         __pyx_t_2 = 0;
 
-        /* "theano/scan/scan_perform.pyx":589
+        /* "aesara/scan/scan_perform.pyx":589
  *                 shape = (pdx,)+ outs[idx][0].shape[1:]
- * 
+ *
  *                 tmp = node.outputs[idx].type.value_zeros(shape)             # <<<<<<<<<<<<<<
  *                 tmp[:] = outs[idx][0][:pdx]
  *                 outs[idx][0][:store_steps[idx]-pdx] = outs[idx][0][pdx:]
@@ -7247,8 +7247,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_tmp, __pyx_t_2);
         __pyx_t_2 = 0;
 
-        /* "theano/scan/scan_perform.pyx":590
- * 
+        /* "aesara/scan/scan_perform.pyx":590
+ *
  *                 tmp = node.outputs[idx].type.value_zeros(shape)
  *                 tmp[:] = outs[idx][0][:pdx]             # <<<<<<<<<<<<<<
  *                 outs[idx][0][:store_steps[idx]-pdx] = outs[idx][0][pdx:]
@@ -7265,7 +7265,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         if (__Pyx_PyObject_SetSlice(__pyx_v_tmp, __pyx_t_2, 0, 0, NULL, NULL, &__pyx_slice__3, 0, 0, 1) < 0) __PYX_ERR(0, 590, __pyx_L1_error)
         __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-        /* "theano/scan/scan_perform.pyx":591
+        /* "aesara/scan/scan_perform.pyx":591
  *                 tmp = node.outputs[idx].type.value_zeros(shape)
  *                 tmp[:] = outs[idx][0][:pdx]
  *                 outs[idx][0][:store_steps[idx]-pdx] = outs[idx][0][pdx:]             # <<<<<<<<<<<<<<
@@ -7289,7 +7289,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
         __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-        /* "theano/scan/scan_perform.pyx":592
+        /* "aesara/scan/scan_perform.pyx":592
  *                 tmp[:] = outs[idx][0][:pdx]
  *                 outs[idx][0][:store_steps[idx]-pdx] = outs[idx][0][pdx:]
  *                 outs[idx][0][store_steps[idx]-pdx:] = tmp             # <<<<<<<<<<<<<<
@@ -7304,8 +7304,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         if (__Pyx_PyObject_SetSlice(__pyx_t_10, __pyx_v_tmp, ((__pyx_v_store_steps[__pyx_v_idx]) - __pyx_v_pdx), 0, NULL, NULL, NULL, 1, 0, 1) < 0) __PYX_ERR(0, 592, __pyx_L1_error)
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":580
- * 
+        /* "aesara/scan/scan_perform.pyx":580
+ *
  *             pdx = pos[idx]
  *             if pdx >= store_steps[idx]//2 :             # <<<<<<<<<<<<<<
  *                 # It seems inefficient to copy the bigger part of the
@@ -7314,7 +7314,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         goto __pyx_L142;
       }
 
-      /* "theano/scan/scan_perform.pyx":594
+      /* "aesara/scan/scan_perform.pyx":594
  *                 outs[idx][0][store_steps[idx]-pdx:] = tmp
  *             else:
  *                 shape = (store_steps[idx]-pdx,) + outs[idx][0].shape[1:]             # <<<<<<<<<<<<<<
@@ -7347,7 +7347,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_shape, __pyx_t_10);
         __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":595
+        /* "aesara/scan/scan_perform.pyx":595
  *             else:
  *                 shape = (store_steps[idx]-pdx,) + outs[idx][0].shape[1:]
  *                 tmp = node.outputs[idx].type.value_zeros(shape)             # <<<<<<<<<<<<<<
@@ -7383,7 +7383,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_tmp, __pyx_t_10);
         __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":596
+        /* "aesara/scan/scan_perform.pyx":596
  *                 shape = (store_steps[idx]-pdx,) + outs[idx][0].shape[1:]
  *                 tmp = node.outputs[idx].type.value_zeros(shape)
  *                 tmp[:] = outs[idx][0][pdx:]             # <<<<<<<<<<<<<<
@@ -7401,7 +7401,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         if (__Pyx_PyObject_SetSlice(__pyx_v_tmp, __pyx_t_10, 0, 0, NULL, NULL, &__pyx_slice__3, 0, 0, 1) < 0) __PYX_ERR(0, 596, __pyx_L1_error)
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":597
+        /* "aesara/scan/scan_perform.pyx":597
  *                 tmp = node.outputs[idx].type.value_zeros(shape)
  *                 tmp[:] = outs[idx][0][pdx:]
  *                 outs[idx][0][store_steps[idx]-pdx:] = outs[idx][0][:pdx]             # <<<<<<<<<<<<<<
@@ -7425,7 +7425,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-        /* "theano/scan/scan_perform.pyx":598
+        /* "aesara/scan/scan_perform.pyx":598
  *                 tmp[:] = outs[idx][0][pdx:]
  *                 outs[idx][0][store_steps[idx]-pdx:] = outs[idx][0][:pdx]
  *                 outs[idx][0][:store_steps[idx]-pdx] = tmp             # <<<<<<<<<<<<<<
@@ -7442,22 +7442,22 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       }
       __pyx_L142:;
 
-      /* "theano/scan/scan_perform.pyx":576
+      /* "aesara/scan/scan_perform.pyx":576
  *     end   = n_outs + n_nit_sot
  *     for idx in range(begin, end):
  *         if ( store_steps[idx] < i-mintaps[idx] and             # <<<<<<<<<<<<<<
  *             pos[idx] < store_steps[idx] ):
- * 
+ *
  */
       goto __pyx_L139;
     }
 
-    /* "theano/scan/scan_perform.pyx":603
+    /* "aesara/scan/scan_perform.pyx":603
  *         # expected to return 0 for all entries for which the gradient is
  *         # not actually computed
  *         elif store_steps[idx] > i - self.mintaps[idx]:             # <<<<<<<<<<<<<<
  *             outs[idx][0][i-self.mintaps[idx]:] = 0
- * 
+ *
  */
     __pyx_t_1 = __Pyx_PyInt_From_int((__pyx_v_store_steps[__pyx_v_idx])); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 603, __pyx_L1_error)
     __Pyx_GOTREF(__pyx_t_1);
@@ -7479,11 +7479,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
     if (__pyx_t_14) {
 
-      /* "theano/scan/scan_perform.pyx":604
+      /* "aesara/scan/scan_perform.pyx":604
  *         # not actually computed
  *         elif store_steps[idx] > i - self.mintaps[idx]:
  *             outs[idx][0][i-self.mintaps[idx]:] = 0             # <<<<<<<<<<<<<<
- * 
+ *
  *             # This is a fix for a bug introduced by while. If you say
  */
       __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_outs, __pyx_v_idx, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 604, __pyx_L1_error)
@@ -7506,22 +7506,22 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-      /* "theano/scan/scan_perform.pyx":613
+      /* "aesara/scan/scan_perform.pyx":613
  *             # if optimization gets applied compared to when optimization
  *             # do not get applied
  *             if i < n_steps:             # <<<<<<<<<<<<<<
- * 
+ *
  * 	    # Cython can not handle negative indices ( because of a
  */
       __pyx_t_14 = ((__pyx_v_i < __pyx_v_n_steps) != 0);
       if (__pyx_t_14) {
 
-        /* "theano/scan/scan_perform.pyx":620
+        /* "aesara/scan/scan_perform.pyx":620
  * 	    # code faster, so this workaround is better then removing
  * 	    # the directive.
  *                 sh0 = outs[idx][0].shape[0]             # <<<<<<<<<<<<<<
  *                 outs[idx][0] = outs[idx][0][:sh0-(n_steps - i)]
- * 
+ *
  */
         __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_outs, __pyx_v_idx, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 620, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_1);
@@ -7537,11 +7537,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_XDECREF_SET(__pyx_v_sh0, __pyx_t_2);
         __pyx_t_2 = 0;
 
-        /* "theano/scan/scan_perform.pyx":621
+        /* "aesara/scan/scan_perform.pyx":621
  * 	    # the directive.
  *                 sh0 = outs[idx][0].shape[0]
  *                 outs[idx][0] = outs[idx][0][:sh0-(n_steps - i)]             # <<<<<<<<<<<<<<
- * 
+ *
  *     # We never reuse the input or output storage of the
  */
         __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_outs, __pyx_v_idx, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 621, __pyx_L1_error)
@@ -7564,27 +7564,27 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
         __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-        /* "theano/scan/scan_perform.pyx":613
+        /* "aesara/scan/scan_perform.pyx":613
  *             # if optimization gets applied compared to when optimization
  *             # do not get applied
  *             if i < n_steps:             # <<<<<<<<<<<<<<
- * 
+ *
  * 	    # Cython can not handle negative indices ( because of a
  */
       }
 
-      /* "theano/scan/scan_perform.pyx":603
+      /* "aesara/scan/scan_perform.pyx":603
  *         # expected to return 0 for all entries for which the gradient is
  *         # not actually computed
  *         elif store_steps[idx] > i - self.mintaps[idx]:             # <<<<<<<<<<<<<<
  *             outs[idx][0][i-self.mintaps[idx]:] = 0
- * 
+ *
  */
     }
     __pyx_L139:;
   }
 
-  /* "theano/scan/scan_perform.pyx":625
+  /* "aesara/scan/scan_perform.pyx":625
  *     # We never reuse the input or output storage of the
  *     # inner function so we clear it.
  *     for i_s in input_storage:             # <<<<<<<<<<<<<<
@@ -7633,7 +7633,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_XDECREF_SET(__pyx_v_i_s, __pyx_t_10);
     __pyx_t_10 = 0;
 
-    /* "theano/scan/scan_perform.pyx":626
+    /* "aesara/scan/scan_perform.pyx":626
  *     # inner function so we clear it.
  *     for i_s in input_storage:
  *         i_s.storage[0] = None             # <<<<<<<<<<<<<<
@@ -7645,7 +7645,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     if (unlikely(__Pyx_SetItemInt(__pyx_t_10, 0, Py_None, long, 1, __Pyx_PyInt_From_long, 0, 0, 0) < 0)) __PYX_ERR(0, 626, __pyx_L1_error)
     __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-    /* "theano/scan/scan_perform.pyx":625
+    /* "aesara/scan/scan_perform.pyx":625
  *     # We never reuse the input or output storage of the
  *     # inner function so we clear it.
  *     for i_s in input_storage:             # <<<<<<<<<<<<<<
@@ -7655,12 +7655,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   }
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-  /* "theano/scan/scan_perform.pyx":627
+  /* "aesara/scan/scan_perform.pyx":627
  *     for i_s in input_storage:
  *         i_s.storage[0] = None
  *     for o_s in output_storage:             # <<<<<<<<<<<<<<
  *         o_s.storage[0] = None
- * 
+ *
  */
   if (likely(PyList_CheckExact(__pyx_v_output_storage)) || PyTuple_CheckExact(__pyx_v_output_storage)) {
     __pyx_t_2 = __pyx_v_output_storage; __Pyx_INCREF(__pyx_t_2); __pyx_t_16 = 0;
@@ -7704,11 +7704,11 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_XDECREF_SET(__pyx_v_o_s, __pyx_t_10);
     __pyx_t_10 = 0;
 
-    /* "theano/scan/scan_perform.pyx":628
+    /* "aesara/scan/scan_perform.pyx":628
  *         i_s.storage[0] = None
  *     for o_s in output_storage:
  *         o_s.storage[0] = None             # <<<<<<<<<<<<<<
- * 
+ *
  *     t_call = time.time() - t0_call
  */
     __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_o_s, __pyx_n_s_storage); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 628, __pyx_L1_error)
@@ -7716,21 +7716,21 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     if (unlikely(__Pyx_SetItemInt(__pyx_t_10, 0, Py_None, long, 1, __Pyx_PyInt_From_long, 0, 0, 0) < 0)) __PYX_ERR(0, 628, __pyx_L1_error)
     __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-    /* "theano/scan/scan_perform.pyx":627
+    /* "aesara/scan/scan_perform.pyx":627
  *     for i_s in input_storage:
  *         i_s.storage[0] = None
  *     for o_s in output_storage:             # <<<<<<<<<<<<<<
  *         o_s.storage[0] = None
- * 
+ *
  */
   }
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-  /* "theano/scan/scan_perform.pyx":630
+  /* "aesara/scan/scan_perform.pyx":630
  *         o_s.storage[0] = None
- * 
+ *
  *     t_call = time.time() - t0_call             # <<<<<<<<<<<<<<
- * 
+ *
  *     if hasattr(fnct.maker, 'profile'):
  */
   __Pyx_GetModuleGlobalName(__pyx_t_10, __pyx_n_s_time); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 630, __pyx_L1_error)
@@ -7759,9 +7759,9 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_v_t_call = __pyx_t_1;
   __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":632
+  /* "aesara/scan/scan_perform.pyx":632
  *     t_call = time.time() - t0_call
- * 
+ *
  *     if hasattr(fnct.maker, 'profile'):             # <<<<<<<<<<<<<<
  *         profile = fnct.maker.profile
  *         if type(profile) is not bool and profile:
@@ -7773,8 +7773,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   __pyx_t_4 = (__pyx_t_14 != 0);
   if (__pyx_t_4) {
 
-    /* "theano/scan/scan_perform.pyx":633
- * 
+    /* "aesara/scan/scan_perform.pyx":633
+ *
  *     if hasattr(fnct.maker, 'profile'):
  *         profile = fnct.maker.profile             # <<<<<<<<<<<<<<
  *         if type(profile) is not bool and profile:
@@ -7788,7 +7788,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_v_profile = __pyx_t_2;
     __pyx_t_2 = 0;
 
-    /* "theano/scan/scan_perform.pyx":634
+    /* "aesara/scan/scan_perform.pyx":634
  *     if hasattr(fnct.maker, 'profile'):
  *         profile = fnct.maker.profile
  *         if type(profile) is not bool and profile:             # <<<<<<<<<<<<<<
@@ -7807,7 +7807,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __pyx_L150_bool_binop_done:;
     if (__pyx_t_4) {
 
-      /* "theano/scan/scan_perform.pyx":635
+      /* "aesara/scan/scan_perform.pyx":635
  *         profile = fnct.maker.profile
  *         if type(profile) is not bool and profile:
  *             profile.vm_call_time +=  t_fn             # <<<<<<<<<<<<<<
@@ -7822,7 +7822,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       if (__Pyx_PyObject_SetAttrStr(__pyx_v_profile, __pyx_n_s_vm_call_time, __pyx_t_1) < 0) __PYX_ERR(0, 635, __pyx_L1_error)
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-      /* "theano/scan/scan_perform.pyx":636
+      /* "aesara/scan/scan_perform.pyx":636
  *         if type(profile) is not bool and profile:
  *             profile.vm_call_time +=  t_fn
  *             profile.callcount += 1             # <<<<<<<<<<<<<<
@@ -7837,7 +7837,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       if (__Pyx_PyObject_SetAttrStr(__pyx_v_profile, __pyx_n_s_callcount, __pyx_t_2) < 0) __PYX_ERR(0, 636, __pyx_L1_error)
       __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-      /* "theano/scan/scan_perform.pyx":637
+      /* "aesara/scan/scan_perform.pyx":637
  *             profile.vm_call_time +=  t_fn
  *             profile.callcount += 1
  *             profile.nbsteps += n_steps             # <<<<<<<<<<<<<<
@@ -7855,7 +7855,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       if (__Pyx_PyObject_SetAttrStr(__pyx_v_profile, __pyx_n_s_nbsteps, __pyx_t_10) < 0) __PYX_ERR(0, 637, __pyx_L1_error)
       __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
 
-      /* "theano/scan/scan_perform.pyx":638
+      /* "aesara/scan/scan_perform.pyx":638
  *             profile.callcount += 1
  *             profile.nbsteps += n_steps
  *             profile.call_time += t_call             # <<<<<<<<<<<<<<
@@ -7870,22 +7870,22 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
       if (__Pyx_PyObject_SetAttrStr(__pyx_v_profile, __pyx_n_s_call_time, __pyx_t_1) < 0) __PYX_ERR(0, 638, __pyx_L1_error)
       __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-      /* "theano/scan/scan_perform.pyx":639
+      /* "aesara/scan/scan_perform.pyx":639
  *             profile.nbsteps += n_steps
  *             profile.call_time += t_call
  *             if hasattr(fn, 'update_profile'):             # <<<<<<<<<<<<<<
  *                 fn.update_profile(profile)
- * 
+ *
  */
       __pyx_t_4 = __Pyx_HasAttr(__pyx_v_fn, __pyx_n_u_update_profile); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(0, 639, __pyx_L1_error)
       __pyx_t_15 = (__pyx_t_4 != 0);
       if (__pyx_t_15) {
 
-        /* "theano/scan/scan_perform.pyx":640
+        /* "aesara/scan/scan_perform.pyx":640
  *             profile.call_time += t_call
  *             if hasattr(fn, 'update_profile'):
  *                 fn.update_profile(profile)             # <<<<<<<<<<<<<<
- * 
+ *
  *     ### Old Profile Mode
  */
         __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_fn, __pyx_n_s_update_profile); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 640, __pyx_L1_error)
@@ -7907,16 +7907,16 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
         __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
         __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-        /* "theano/scan/scan_perform.pyx":639
+        /* "aesara/scan/scan_perform.pyx":639
  *             profile.nbsteps += n_steps
  *             profile.call_time += t_call
  *             if hasattr(fn, 'update_profile'):             # <<<<<<<<<<<<<<
  *                 fn.update_profile(profile)
- * 
+ *
  */
       }
 
-      /* "theano/scan/scan_perform.pyx":634
+      /* "aesara/scan/scan_perform.pyx":634
  *     if hasattr(fnct.maker, 'profile'):
  *         profile = fnct.maker.profile
  *         if type(profile) is not bool and profile:             # <<<<<<<<<<<<<<
@@ -7925,17 +7925,17 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
     }
 
-    /* "theano/scan/scan_perform.pyx":632
+    /* "aesara/scan/scan_perform.pyx":632
  *     t_call = time.time() - t0_call
- * 
+ *
  *     if hasattr(fnct.maker, 'profile'):             # <<<<<<<<<<<<<<
  *         profile = fnct.maker.profile
  *         if type(profile) is not bool and profile:
  */
   }
 
-  /* "theano/scan/scan_perform.pyx":651
- * 
+  /* "aesara/scan/scan_perform.pyx":651
+ *
  *     # DEBUG PRINT :
  *     self.t_call = t_call             # <<<<<<<<<<<<<<
  *     self.t_fn   = t_fn
@@ -7943,7 +7943,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_t_call, __pyx_v_t_call) < 0) __PYX_ERR(0, 651, __pyx_L1_error)
 
-  /* "theano/scan/scan_perform.pyx":652
+  /* "aesara/scan/scan_perform.pyx":652
  *     # DEBUG PRINT :
  *     self.t_call = t_call
  *     self.t_fn   = t_fn             # <<<<<<<<<<<<<<
@@ -7951,8 +7951,8 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
  */
   if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_t_fn, __pyx_v_t_fn) < 0) __PYX_ERR(0, 652, __pyx_L1_error)
 
-  /* "theano/scan/scan_perform.pyx":70
- * 
+  /* "aesara/scan/scan_perform.pyx":70
+ *
  * @cython.boundscheck(False)
  * def perform(             # <<<<<<<<<<<<<<
  *             unsigned int n_shared_outs,
@@ -7989,7 +7989,7 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
     __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_vector_outs.rcbuffer->pybuffer);
     __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_vector_seqs.rcbuffer->pybuffer);
   __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
-  __Pyx_AddTraceback("theano.scan.scan_perform.perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_AddTraceback("aesara.scan.scan_perform.perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
   goto __pyx_L2;
   __pyx_L0:;
@@ -8042,12 +8042,12 @@ static PyObject *__pyx_pf_6theano_4scan_12scan_perform_2perform(CYTHON_UNUSED Py
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":734
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":734
  * ctypedef npy_cdouble     complex_t
- * 
+ *
  * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(1, <void*>a)
- * 
+ *
  */
 
 static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
@@ -8059,11 +8059,11 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":735
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":735
+ *
  * cdef inline object PyArray_MultiIterNew1(a):
  *     return PyArray_MultiIterNew(1, <void*>a)             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline object PyArray_MultiIterNew2(a, b):
  */
   __Pyx_XDECREF(__pyx_r);
@@ -8073,12 +8073,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__
   __pyx_t_1 = 0;
   goto __pyx_L0;
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":734
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":734
  * ctypedef npy_cdouble     complex_t
- * 
+ *
  * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(1, <void*>a)
- * 
+ *
  */
 
   /* function exit code */
@@ -8092,12 +8092,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":737
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":737
  *     return PyArray_MultiIterNew(1, <void*>a)
- * 
+ *
  * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
- * 
+ *
  */
 
 static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
@@ -8109,11 +8109,11 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":738
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":738
+ *
  * cdef inline object PyArray_MultiIterNew2(a, b):
  *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline object PyArray_MultiIterNew3(a, b, c):
  */
   __Pyx_XDECREF(__pyx_r);
@@ -8123,12 +8123,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__
   __pyx_t_1 = 0;
   goto __pyx_L0;
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":737
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":737
  *     return PyArray_MultiIterNew(1, <void*>a)
- * 
+ *
  * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
- * 
+ *
  */
 
   /* function exit code */
@@ -8142,12 +8142,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":740
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":740
  *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
- * 
+ *
  * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
- * 
+ *
  */
 
 static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
@@ -8159,11 +8159,11 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":741
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":741
+ *
  * cdef inline object PyArray_MultiIterNew3(a, b, c):
  *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
  */
   __Pyx_XDECREF(__pyx_r);
@@ -8173,12 +8173,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__
   __pyx_t_1 = 0;
   goto __pyx_L0;
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":740
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":740
  *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
- * 
+ *
  * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
- * 
+ *
  */
 
   /* function exit code */
@@ -8192,12 +8192,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":743
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":743
  *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
- * 
+ *
  * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
- * 
+ *
  */
 
 static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
@@ -8209,11 +8209,11 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":744
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":744
+ *
  * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
  *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
  */
   __Pyx_XDECREF(__pyx_r);
@@ -8223,12 +8223,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__
   __pyx_t_1 = 0;
   goto __pyx_L0;
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":743
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":743
  *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
- * 
+ *
  * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
- * 
+ *
  */
 
   /* function exit code */
@@ -8242,12 +8242,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":746
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":746
  *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
- * 
+ *
  * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
- * 
+ *
  */
 
 static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
@@ -8259,11 +8259,11 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":747
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":747
+ *
  * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
  *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline tuple PyDataType_SHAPE(dtype d):
  */
   __Pyx_XDECREF(__pyx_r);
@@ -8273,12 +8273,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__
   __pyx_t_1 = 0;
   goto __pyx_L0;
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":746
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":746
  *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
- * 
+ *
  * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
  *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
- * 
+ *
  */
 
   /* function exit code */
@@ -8292,9 +8292,9 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":749
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":749
  *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
- * 
+ *
  * cdef inline tuple PyDataType_SHAPE(dtype d):             # <<<<<<<<<<<<<<
  *     if PyDataType_HASSUBARRAY(d):
  *         return <tuple>d.subarray.shape
@@ -8306,8 +8306,8 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__
   int __pyx_t_1;
   __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":750
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":750
+ *
  * cdef inline tuple PyDataType_SHAPE(dtype d):
  *     if PyDataType_HASSUBARRAY(d):             # <<<<<<<<<<<<<<
  *         return <tuple>d.subarray.shape
@@ -8316,7 +8316,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__
   __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0);
   if (__pyx_t_1) {
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":751
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":751
  * cdef inline tuple PyDataType_SHAPE(dtype d):
  *     if PyDataType_HASSUBARRAY(d):
  *         return <tuple>d.subarray.shape             # <<<<<<<<<<<<<<
@@ -8328,8 +8328,8 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__
     __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape);
     goto __pyx_L0;
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":750
- * 
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":750
+ *
  * cdef inline tuple PyDataType_SHAPE(dtype d):
  *     if PyDataType_HASSUBARRAY(d):             # <<<<<<<<<<<<<<
  *         return <tuple>d.subarray.shape
@@ -8337,12 +8337,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__
  */
   }
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":753
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":753
  *         return <tuple>d.subarray.shape
  *     else:
  *         return ()             # <<<<<<<<<<<<<<
- * 
- * 
+ *
+ *
  */
   /*else*/ {
     __Pyx_XDECREF(__pyx_r);
@@ -8351,9 +8351,9 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__
     goto __pyx_L0;
   }
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":749
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":749
  *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
- * 
+ *
  * cdef inline tuple PyDataType_SHAPE(dtype d):             # <<<<<<<<<<<<<<
  *     if PyDataType_HASSUBARRAY(d):
  *         return <tuple>d.subarray.shape
@@ -8366,9 +8366,9 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":868
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":868
  *     int _import_umath() except -1
- * 
+ *
  * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
  *     Py_INCREF(base) # important to do this before stealing the reference below!
  *     PyArray_SetBaseObject(arr, base)
@@ -8378,27 +8378,27 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("set_array_base", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":869
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":869
+ *
  * cdef inline void set_array_base(ndarray arr, object base):
  *     Py_INCREF(base) # important to do this before stealing the reference below!             # <<<<<<<<<<<<<<
  *     PyArray_SetBaseObject(arr, base)
- * 
+ *
  */
   Py_INCREF(__pyx_v_base);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":870
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":870
  * cdef inline void set_array_base(ndarray arr, object base):
  *     Py_INCREF(base) # important to do this before stealing the reference below!
  *     PyArray_SetBaseObject(arr, base)             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline object get_array_base(ndarray arr):
  */
   (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base));
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":868
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":868
  *     int _import_umath() except -1
- * 
+ *
  * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
  *     Py_INCREF(base) # important to do this before stealing the reference below!
  *     PyArray_SetBaseObject(arr, base)
@@ -8408,9 +8408,9 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a
   __Pyx_RefNannyFinishContext();
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":872
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":872
  *     PyArray_SetBaseObject(arr, base)
- * 
+ *
  * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
  *     base = PyArray_BASE(arr)
  *     if base is NULL:
@@ -8423,8 +8423,8 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py
   int __pyx_t_1;
   __Pyx_RefNannySetupContext("get_array_base", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":873
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":873
+ *
  * cdef inline object get_array_base(ndarray arr):
  *     base = PyArray_BASE(arr)             # <<<<<<<<<<<<<<
  *     if base is NULL:
@@ -8432,7 +8432,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py
  */
   __pyx_v_base = PyArray_BASE(__pyx_v_arr);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":874
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":874
  * cdef inline object get_array_base(ndarray arr):
  *     base = PyArray_BASE(arr)
  *     if base is NULL:             # <<<<<<<<<<<<<<
@@ -8442,18 +8442,18 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py
   __pyx_t_1 = ((__pyx_v_base == NULL) != 0);
   if (__pyx_t_1) {
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":875
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":875
  *     base = PyArray_BASE(arr)
  *     if base is NULL:
  *         return None             # <<<<<<<<<<<<<<
  *     return <object>base
- * 
+ *
  */
     __Pyx_XDECREF(__pyx_r);
     __pyx_r = Py_None; __Pyx_INCREF(Py_None);
     goto __pyx_L0;
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":874
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":874
  * cdef inline object get_array_base(ndarray arr):
  *     base = PyArray_BASE(arr)
  *     if base is NULL:             # <<<<<<<<<<<<<<
@@ -8462,11 +8462,11 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py
  */
   }
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":876
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":876
  *     if base is NULL:
  *         return None
  *     return <object>base             # <<<<<<<<<<<<<<
- * 
+ *
  * # Versions of the import_* functions which are more suitable for
  */
   __Pyx_XDECREF(__pyx_r);
@@ -8474,9 +8474,9 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py
   __pyx_r = ((PyObject *)__pyx_v_base);
   goto __pyx_L0;
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":872
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":872
  *     PyArray_SetBaseObject(arr, base)
- * 
+ *
  * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
  *     base = PyArray_BASE(arr)
  *     if base is NULL:
@@ -8489,7 +8489,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":880
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":880
  * # Versions of the import_* functions which are more suitable for
  * # Cython code.
  * cdef inline int import_array() except -1:             # <<<<<<<<<<<<<<
@@ -8513,7 +8513,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("import_array", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":881
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":881
  * # Cython code.
  * cdef inline int import_array() except -1:
  *     try:             # <<<<<<<<<<<<<<
@@ -8529,7 +8529,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
     __Pyx_XGOTREF(__pyx_t_3);
     /*try:*/ {
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":882
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":882
  * cdef inline int import_array() except -1:
  *     try:
  *         __pyx_import_array()             # <<<<<<<<<<<<<<
@@ -8538,7 +8538,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
  */
       __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 882, __pyx_L3_error)
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":881
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":881
  * # Cython code.
  * cdef inline int import_array() except -1:
  *     try:             # <<<<<<<<<<<<<<
@@ -8552,12 +8552,12 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
     goto __pyx_L8_try_end;
     __pyx_L3_error:;
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":883
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":883
  *     try:
  *         __pyx_import_array()
  *     except Exception:             # <<<<<<<<<<<<<<
  *         raise ImportError("numpy.core.multiarray failed to import")
- * 
+ *
  */
     __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
     if (__pyx_t_4) {
@@ -8567,11 +8567,11 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
       __Pyx_GOTREF(__pyx_t_6);
       __Pyx_GOTREF(__pyx_t_7);
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":884
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":884
  *         __pyx_import_array()
  *     except Exception:
  *         raise ImportError("numpy.core.multiarray failed to import")             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline int import_umath() except -1:
  */
       __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 884, __pyx_L5_except_error)
@@ -8583,7 +8583,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
     goto __pyx_L5_except_error;
     __pyx_L5_except_error:;
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":881
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":881
  * # Cython code.
  * cdef inline int import_array() except -1:
  *     try:             # <<<<<<<<<<<<<<
@@ -8598,7 +8598,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
     __pyx_L8_try_end:;
   }
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":880
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":880
  * # Versions of the import_* functions which are more suitable for
  * # Cython code.
  * cdef inline int import_array() except -1:             # <<<<<<<<<<<<<<
@@ -8621,9 +8621,9 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":886
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":886
  *         raise ImportError("numpy.core.multiarray failed to import")
- * 
+ *
  * cdef inline int import_umath() except -1:             # <<<<<<<<<<<<<<
  *     try:
  *         _import_umath()
@@ -8645,8 +8645,8 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("import_umath", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":887
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":887
+ *
  * cdef inline int import_umath() except -1:
  *     try:             # <<<<<<<<<<<<<<
  *         _import_umath()
@@ -8661,7 +8661,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
     __Pyx_XGOTREF(__pyx_t_3);
     /*try:*/ {
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":888
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":888
  * cdef inline int import_umath() except -1:
  *     try:
  *         _import_umath()             # <<<<<<<<<<<<<<
@@ -8670,8 +8670,8 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
  */
       __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 888, __pyx_L3_error)
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":887
- * 
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":887
+ *
  * cdef inline int import_umath() except -1:
  *     try:             # <<<<<<<<<<<<<<
  *         _import_umath()
@@ -8684,12 +8684,12 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
     goto __pyx_L8_try_end;
     __pyx_L3_error:;
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":889
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":889
  *     try:
  *         _import_umath()
  *     except Exception:             # <<<<<<<<<<<<<<
  *         raise ImportError("numpy.core.umath failed to import")
- * 
+ *
  */
     __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
     if (__pyx_t_4) {
@@ -8699,11 +8699,11 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
       __Pyx_GOTREF(__pyx_t_6);
       __Pyx_GOTREF(__pyx_t_7);
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":890
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":890
  *         _import_umath()
  *     except Exception:
  *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline int import_ufunc() except -1:
  */
       __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 890, __pyx_L5_except_error)
@@ -8715,8 +8715,8 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
     goto __pyx_L5_except_error;
     __pyx_L5_except_error:;
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":887
- * 
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":887
+ *
  * cdef inline int import_umath() except -1:
  *     try:             # <<<<<<<<<<<<<<
  *         _import_umath()
@@ -8730,9 +8730,9 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
     __pyx_L8_try_end:;
   }
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":886
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":886
  *         raise ImportError("numpy.core.multiarray failed to import")
- * 
+ *
  * cdef inline int import_umath() except -1:             # <<<<<<<<<<<<<<
  *     try:
  *         _import_umath()
@@ -8753,9 +8753,9 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
   return __pyx_r;
 }
 
-/* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":892
+/* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":892
  *         raise ImportError("numpy.core.umath failed to import")
- * 
+ *
  * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
  *     try:
  *         _import_umath()
@@ -8777,8 +8777,8 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
   int __pyx_clineno = 0;
   __Pyx_RefNannySetupContext("import_ufunc", 0);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":893
- * 
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":893
+ *
  * cdef inline int import_ufunc() except -1:
  *     try:             # <<<<<<<<<<<<<<
  *         _import_umath()
@@ -8793,7 +8793,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
     __Pyx_XGOTREF(__pyx_t_3);
     /*try:*/ {
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":894
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":894
  * cdef inline int import_ufunc() except -1:
  *     try:
  *         _import_umath()             # <<<<<<<<<<<<<<
@@ -8802,8 +8802,8 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
  */
       __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 894, __pyx_L3_error)
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":893
- * 
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":893
+ *
  * cdef inline int import_ufunc() except -1:
  *     try:             # <<<<<<<<<<<<<<
  *         _import_umath()
@@ -8816,12 +8816,12 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
     goto __pyx_L8_try_end;
     __pyx_L3_error:;
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":895
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":895
  *     try:
  *         _import_umath()
  *     except Exception:             # <<<<<<<<<<<<<<
  *         raise ImportError("numpy.core.umath failed to import")
- * 
+ *
  */
     __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
     if (__pyx_t_4) {
@@ -8831,11 +8831,11 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
       __Pyx_GOTREF(__pyx_t_6);
       __Pyx_GOTREF(__pyx_t_7);
 
-      /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":896
+      /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":896
  *         _import_umath()
  *     except Exception:
  *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef extern from *:
  */
       __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 896, __pyx_L5_except_error)
@@ -8847,8 +8847,8 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
     goto __pyx_L5_except_error;
     __pyx_L5_except_error:;
 
-    /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":893
- * 
+    /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":893
+ *
  * cdef inline int import_ufunc() except -1:
  *     try:             # <<<<<<<<<<<<<<
  *         _import_umath()
@@ -8862,9 +8862,9 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
     __pyx_L8_try_end:;
   }
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":892
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":892
  *         raise ImportError("numpy.core.umath failed to import")
- * 
+ *
  * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
  *     try:
  *         _import_umath()
@@ -9054,8 +9054,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_n_s_tap_array_len, __pyx_k_tap_array_len, sizeof(__pyx_k_tap_array_len), 0, 0, 1, 1},
   {&__pyx_n_s_tdx, __pyx_k_tdx, sizeof(__pyx_k_tdx), 0, 0, 1, 1},
   {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
-  {&__pyx_n_s_theano_link_utils, __pyx_k_theano_link_utils, sizeof(__pyx_k_theano_link_utils), 0, 0, 1, 1},
-  {&__pyx_n_s_theano_scan_scan_perform, __pyx_k_theano_scan_scan_perform, sizeof(__pyx_k_theano_scan_scan_perform), 0, 0, 1, 1},
+  {&__pyx_n_s_aesara_link_utils, __pyx_k_aesara_link_utils, sizeof(__pyx_k_aesara_link_utils), 0, 0, 1, 1},
+  {&__pyx_n_s_aesara_scan_scan_perform, __pyx_k_aesara_scan_scan_perform, sizeof(__pyx_k_aesara_scan_scan_perform), 0, 0, 1, 1},
   {&__pyx_n_s_thunks, __pyx_k_thunks, sizeof(__pyx_k_thunks), 0, 0, 1, 1},
   {&__pyx_n_u_thunks, __pyx_k_thunks, sizeof(__pyx_k_thunks), 0, 1, 0, 1},
   {&__pyx_n_s_time, __pyx_k_time, sizeof(__pyx_k_time), 0, 0, 1, 1},
@@ -9094,7 +9094,7 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
   __Pyx_RefNannyDeclarations
   __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
 
-  /* "theano/scan/scan_perform.pyx":217
+  /* "aesara/scan/scan_perform.pyx":217
  *             n_steps)
  *     elif n_steps == 0:
  *         raise NotImplementedError(             # <<<<<<<<<<<<<<
@@ -9105,7 +9105,7 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
   __Pyx_GOTREF(__pyx_tuple_);
   __Pyx_GIVEREF(__pyx_tuple_);
 
-  /* "theano/scan/scan_perform.pyx":250
+  /* "aesara/scan/scan_perform.pyx":250
  *             outs[idx][0] = args[ <unsigned int>(1+ n_seqs + idx)]
  *         elif ( outs[idx][0] is not None and
  *               outs[idx][0].shape[1:] == args[<unsigned int>(1+ n_seqs + idx)].shape[1:]             # <<<<<<<<<<<<<<
@@ -9116,7 +9116,7 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
   __Pyx_GOTREF(__pyx_slice__2);
   __Pyx_GIVEREF(__pyx_slice__2);
 
-  /* "theano/scan/scan_perform.pyx":259
+  /* "aesara/scan/scan_perform.pyx":259
  *                                                        idx)][:l]
  *             else:
  *                 outs[idx][0][:] = args[<unsigned int>(seqs_arg_offset + idx)]             # <<<<<<<<<<<<<<
@@ -9127,7 +9127,7 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
   __Pyx_GOTREF(__pyx_slice__3);
   __Pyx_GIVEREF(__pyx_slice__3);
 
-  /* "theano/scan/scan_perform.pyx":432
+  /* "aesara/scan/scan_perform.pyx":432
  *         if getattr(fn, 'need_update_inputs', True):
  *             # Update the inputs that have an update function
  *             for inp, storage in zip(self.fn.maker.expanded_inputs[::-1],             # <<<<<<<<<<<<<<
@@ -9138,7 +9138,7 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
   __Pyx_GOTREF(__pyx_slice__4);
   __Pyx_GIVEREF(__pyx_slice__4);
 
-  /* "theano/scan/scan_perform.pyx":553
+  /* "aesara/scan/scan_perform.pyx":553
  *                         if i == 0:
  *                             raise
  *                         raise ValueError(             # <<<<<<<<<<<<<<
@@ -9149,39 +9149,39 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
   __Pyx_GOTREF(__pyx_tuple__5);
   __Pyx_GIVEREF(__pyx_tuple__5);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":884
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":884
  *         __pyx_import_array()
  *     except Exception:
  *         raise ImportError("numpy.core.multiarray failed to import")             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline int import_umath() except -1:
  */
   __pyx_tuple__6 = PyTuple_Pack(1, __pyx_kp_u_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(1, 884, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_tuple__6);
   __Pyx_GIVEREF(__pyx_tuple__6);
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":890
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":890
  *         _import_umath()
  *     except Exception:
  *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
- * 
+ *
  * cdef inline int import_ufunc() except -1:
  */
   __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_u_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 890, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_tuple__7);
   __Pyx_GIVEREF(__pyx_tuple__7);
 
-  /* "theano/scan/scan_perform.pyx":66
- * 
- * 
+  /* "aesara/scan/scan_perform.pyx":66
+ *
+ *
  * def get_version():             # <<<<<<<<<<<<<<
  *     return 0.298
- * 
+ *
  */
   __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(0, 0, 0, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_scan_perform_pyx, __pyx_n_s_get_version, 66, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(0, 66, __pyx_L1_error)
 
-  /* "theano/scan/scan_perform.pyx":70
- * 
+  /* "aesara/scan/scan_perform.pyx":70
+ *
  * @cython.boundscheck(False)
  * def perform(             # <<<<<<<<<<<<<<
  *             unsigned int n_shared_outs,
@@ -9259,7 +9259,7 @@ static int __Pyx_modinit_type_import_code(void) {
   /*--- Type import code ---*/
   __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 9, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_1);
-  __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", 
+  __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type",
   #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
   sizeof(PyTypeObject),
   #else
@@ -9480,14 +9480,14 @@ if (!__Pyx_RefNanny) {
   #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
   if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
   #endif
-  if (__pyx_module_is_main_theano__scan__scan_perform) {
+  if (__pyx_module_is_main_aesara__scan__scan_perform) {
     if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
   }
   #if PY_MAJOR_VERSION >= 3
   {
     PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
-    if (!PyDict_GetItemString(modules, "theano.scan.scan_perform")) {
-      if (unlikely(PyDict_SetItemString(modules, "theano.scan.scan_perform", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
+    if (!PyDict_GetItemString(modules, "aesara.scan.scan_perform")) {
+      if (unlikely(PyDict_SetItemString(modules, "aesara.scan.scan_perform", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
     }
   }
   #endif
@@ -9508,29 +9508,29 @@ if (!__Pyx_RefNanny) {
   if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
   #endif
 
-  /* "theano/scan/scan_perform.pyx":51
- * 
- * 
+  /* "aesara/scan/scan_perform.pyx":51
+ *
+ *
  * __authors__ = "Razvan Pascanu" "PyMC Developers"             # <<<<<<<<<<<<<<
  * __copyright__ = "(c) 2011, Universite de Montreal"
- * 
+ *
  */
   if (PyDict_SetItem(__pyx_d, __pyx_n_s_authors, __pyx_kp_u_Razvan_PascanuPyMC_Developers) < 0) __PYX_ERR(0, 51, __pyx_L1_error)
 
-  /* "theano/scan/scan_perform.pyx":52
- * 
+  /* "aesara/scan/scan_perform.pyx":52
+ *
  * __authors__ = "Razvan Pascanu" "PyMC Developers"
  * __copyright__ = "(c) 2011, Universite de Montreal"             # <<<<<<<<<<<<<<
- * 
- * 
+ *
+ *
  */
   if (PyDict_SetItem(__pyx_d, __pyx_n_s_copyright, __pyx_kp_u_c_2011_Universite_de_Montreal) < 0) __PYX_ERR(0, 52, __pyx_L1_error)
 
-  /* "theano/scan/scan_perform.pyx":56
- * 
+  /* "aesara/scan/scan_perform.pyx":56
+ *
  * import cython
  * import numpy             # <<<<<<<<<<<<<<
- * 
+ *
  * cimport numpy
  */
   __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error)
@@ -9538,43 +9538,43 @@ if (!__Pyx_RefNanny) {
   if (PyDict_SetItem(__pyx_d, __pyx_n_s_numpy, __pyx_t_1) < 0) __PYX_ERR(0, 56, __pyx_L1_error)
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":60
+  /* "aesara/scan/scan_perform.pyx":60
  * cimport numpy
- * 
+ *
  * import copy             # <<<<<<<<<<<<<<
  * import time
- * 
+ *
  */
   __pyx_t_1 = __Pyx_Import(__pyx_n_s_copy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 60, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_1);
   if (PyDict_SetItem(__pyx_d, __pyx_n_s_copy, __pyx_t_1) < 0) __PYX_ERR(0, 60, __pyx_L1_error)
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":61
- * 
+  /* "aesara/scan/scan_perform.pyx":61
+ *
  * import copy
  * import time             # <<<<<<<<<<<<<<
- * 
- * from theano.link.utils import raise_with_op
+ *
+ * from aesara.link.utils import raise_with_op
  */
   __pyx_t_1 = __Pyx_Import(__pyx_n_s_time, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 61, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_1);
   if (PyDict_SetItem(__pyx_d, __pyx_n_s_time, __pyx_t_1) < 0) __PYX_ERR(0, 61, __pyx_L1_error)
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
-  /* "theano/scan/scan_perform.pyx":63
+  /* "aesara/scan/scan_perform.pyx":63
  * import time
- * 
- * from theano.link.utils import raise_with_op             # <<<<<<<<<<<<<<
- * 
- * 
+ *
+ * from aesara.link.utils import raise_with_op             # <<<<<<<<<<<<<<
+ *
+ *
  */
   __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_1);
   __Pyx_INCREF(__pyx_n_s_raise_with_op);
   __Pyx_GIVEREF(__pyx_n_s_raise_with_op);
   PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_raise_with_op);
-  __pyx_t_2 = __Pyx_Import(__pyx_n_s_theano_link_utils, __pyx_t_1, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 63, __pyx_L1_error)
+  __pyx_t_2 = __Pyx_Import(__pyx_n_s_aesara_link_utils, __pyx_t_1, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 63, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_2);
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_raise_with_op); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error)
@@ -9583,31 +9583,31 @@ if (!__Pyx_RefNanny) {
   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-  /* "theano/scan/scan_perform.pyx":66
- * 
- * 
+  /* "aesara/scan/scan_perform.pyx":66
+ *
+ *
  * def get_version():             # <<<<<<<<<<<<<<
  *     return 0.298
- * 
+ *
  */
-  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_6theano_4scan_12scan_perform_1get_version, NULL, __pyx_n_s_theano_scan_scan_perform); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 66, __pyx_L1_error)
+  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_6aesara_4scan_12scan_perform_1get_version, NULL, __pyx_n_s_aesara_scan_scan_perform); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 66, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_2);
   if (PyDict_SetItem(__pyx_d, __pyx_n_s_get_version, __pyx_t_2) < 0) __PYX_ERR(0, 66, __pyx_L1_error)
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-  /* "theano/scan/scan_perform.pyx":70
- * 
+  /* "aesara/scan/scan_perform.pyx":70
+ *
  * @cython.boundscheck(False)
  * def perform(             # <<<<<<<<<<<<<<
  *             unsigned int n_shared_outs,
  *             unsigned int n_mit_mot_outs,
  */
-  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_6theano_4scan_12scan_perform_3perform, NULL, __pyx_n_s_theano_scan_scan_perform); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 70, __pyx_L1_error)
+  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_6aesara_4scan_12scan_perform_3perform, NULL, __pyx_n_s_aesara_scan_scan_perform); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 70, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_2);
   if (PyDict_SetItem(__pyx_d, __pyx_n_s_perform, __pyx_t_2) < 0) __PYX_ERR(0, 70, __pyx_L1_error)
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-  /* "theano/scan/scan_perform.pyx":1
+  /* "aesara/scan/scan_perform.pyx":1
  * # cython: language_level=3             # <<<<<<<<<<<<<<
  * """
  *  This code implements the operations that scan has to carry on when called
@@ -9617,9 +9617,9 @@ if (!__Pyx_RefNanny) {
   if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
   __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
 
-  /* "../../../../../../apps/anaconda3/envs/theano-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":892
+  /* "../../../../../../apps/anaconda3/envs/aesara-3.7/lib/python3.7/site-packages/numpy/__init__.pxd":892
  *         raise ImportError("numpy.core.umath failed to import")
- * 
+ *
  * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
  *     try:
  *         _import_umath()
@@ -9633,11 +9633,11 @@ if (!__Pyx_RefNanny) {
   __Pyx_XDECREF(__pyx_t_2);
   if (__pyx_m) {
     if (__pyx_d) {
-      __Pyx_AddTraceback("init theano.scan.scan_perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      __Pyx_AddTraceback("init aesara.scan.scan_perform", __pyx_clineno, __pyx_lineno, __pyx_filename);
     }
     Py_CLEAR(__pyx_m);
   } else if (!PyErr_Occurred()) {
-    PyErr_SetString(PyExc_ImportError, "init theano.scan.scan_perform");
+    PyErr_SetString(PyExc_ImportError, "init aesara.scan.scan_perform");
   }
   __pyx_L0:;
   __Pyx_RefNannyFinishContext();
@@ -11759,8 +11759,8 @@ static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED
                 llx = lla + llb;
             return PyLong_FromLongLong(llx);
 #endif
-        
-        
+
+
     }
     #endif
     if (PyFloat_CheckExact(op1)) {
diff --git a/theano/scan/checkpoints.py b/aesara/scan/checkpoints.py
similarity index 90%
rename from theano/scan/checkpoints.py
rename to aesara/scan/checkpoints.py
index 5198d74160..ea18ccafc2 100644
--- a/theano/scan/checkpoints.py
+++ b/aesara/scan/checkpoints.py
@@ -1,8 +1,8 @@
-import theano.tensor.basic as tt
-from theano.scan.basic import scan
-from theano.tensor.basic import Join
-from theano.tensor.math import ceil, eq
-from theano.tensor.subtensor import set_subtensor
+import aesara.tensor.basic as tt
+from aesara.scan.basic import scan
+from aesara.tensor.basic import Join
+from aesara.tensor.math import ceil, eq
+from aesara.tensor.subtensor import set_subtensor
 
 
 def scan_checkpoints(
@@ -17,7 +17,7 @@ def scan_checkpoints(
 ):
     """Scan function that uses less memory, but is more restrictive.
 
-    In :func:`~theano.scan`, if you compute the gradient of the output
+    In :func:`~aesara.scan`, if you compute the gradient of the output
     with respect to the input, you will have to store the intermediate
     results at each time step, which can be prohibitively huge. This
     function allows to do ``save_every_N`` steps of forward computations
@@ -41,16 +41,16 @@ def scan_checkpoints(
     ----------
     fn
         ``fn`` is a function that describes the operations involved in one
-        step of ``scan``. See the documentation of :func:`~theano.scan`
+        step of ``scan``. See the documentation of :func:`~aesara.scan`
         for more information.
 
     sequences
-        ``sequences`` is the list of Theano variables or dictionaries
+        ``sequences`` is the list of Aesara variables or dictionaries
         describing the sequences ``scan`` has to iterate over. All
         sequences must be the same length in this version of ``scan``.
 
     outputs_info
-        ``outputs_info`` is the list of Theano variables or dictionaries
+        ``outputs_info`` is the list of Aesara variables or dictionaries
         describing the initial state of the outputs computed
         recurrently.
 
@@ -62,7 +62,7 @@ def scan_checkpoints(
 
     n_steps
         ``n_steps`` is the number of steps to iterate given as an int
-        or Theano scalar (> 0). If any of the input sequences do not have
+        or Aesara scalar (> 0). If any of the input sequences do not have
         enough elements, scan will raise an error. If n_steps is not provided,
         ``scan`` will figure out the amount of steps it should run given its
         input sequences.
@@ -82,7 +82,7 @@ def scan_checkpoints(
     Returns
     -------
     tuple
-        Tuple of the form ``(outputs, updates)`` as in :func:`~theano.scan`, but
+        Tuple of the form ``(outputs, updates)`` as in :func:`~aesara.scan`, but
         with a small change: It only contain the output at each
         ``save_every_N`` step. The time steps that are not returned by
         this function will be recomputed during the gradient computation
@@ -90,7 +90,7 @@ def scan_checkpoints(
 
     See Also
     --------
-    :func:`~theano.scan`: Looping in Theano.
+    :func:`~aesara.scan`: Looping in Aesara.
 
     """
     # Standardize the format of input arguments
diff --git a/theano/scan/op.py b/aesara/scan/op.py
similarity index 98%
rename from theano/scan/op.py
rename to aesara/scan/op.py
index 4e173c2904..44c3045e57 100644
--- a/theano/scan/op.py
+++ b/aesara/scan/op.py
@@ -20,8 +20,8 @@
   inputs and the outputs of the inner function which could lead to invalid
   results.
 - In make_thunk(), again, the borrow flag must be set to True for the outputs.
-  This will make Theano consider the output storages as persistent and make
-  Theano provide them as pre-allocated storage to the ops that compute the
+  This will make Aesara consider the output storages as persistent and make
+  Aesara provide them as pre-allocated storage to the ops that compute the
   outputs of the inner function instead of letting these ops allocate their
   own output storage.
 - The ops that produce the outputs of the inner function must be prevented
@@ -52,16 +52,16 @@
 
 import numpy as np
 
-import theano
-from theano import tensor as tt
-from theano.compile.builders import infer_shape
-from theano.compile.function import function
-from theano.compile.io import In, Out
-from theano.compile.mode import AddFeatureOptimizer, get_mode
-from theano.compile.profiling import ScanProfileStats, register_profiler_printer
-from theano.configdefaults import config
-from theano.gradient import DisconnectedType, NullType, Rop, grad, grad_undefined
-from theano.graph.basic import (
+import aesara
+from aesara import tensor as tt
+from aesara.compile.builders import infer_shape
+from aesara.compile.function import function
+from aesara.compile.io import In, Out
+from aesara.compile.mode import AddFeatureOptimizer, get_mode
+from aesara.compile.profiling import ScanProfileStats, register_profiler_printer
+from aesara.configdefaults import config
+from aesara.gradient import DisconnectedType, NullType, Rop, grad, grad_undefined
+from aesara.graph.basic import (
     Apply,
     Constant,
     Variable,
@@ -70,18 +70,18 @@
     graph_inputs,
     io_connection_pattern,
 )
-from theano.graph.fg import MissingInputError
-from theano.graph.op import Op, ops_with_inner_function
-from theano.graph.toolbox import NoOutputFromInplace
-from theano.link.c.basic import CLinker
-from theano.link.c.exceptions import MissingGXX
-from theano.link.utils import raise_with_op
-from theano.scan.utils import Validator, forced_replace, hash_listsDictsTuples, safe_new
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.math import minimum
-from theano.tensor.shape import Shape_i
-from theano.tensor.type import TensorType, integer_dtypes
-from theano.tensor.var import TensorVariable
+from aesara.graph.fg import MissingInputError
+from aesara.graph.op import Op, ops_with_inner_function
+from aesara.graph.toolbox import NoOutputFromInplace
+from aesara.link.c.basic import CLinker
+from aesara.link.c.exceptions import MissingGXX
+from aesara.link.utils import raise_with_op
+from aesara.scan.utils import Validator, forced_replace, hash_listsDictsTuples, safe_new
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.math import minimum
+from aesara.tensor.shape import Shape_i
+from aesara.tensor.type import TensorType, integer_dtypes
+from aesara.tensor.var import TensorVariable
 
 
 __docformat__ = "restructedtext en"
@@ -95,7 +95,7 @@
 __copyright__ = "(c) 2010, Universite de Montreal"
 
 # Logging function for sending warning or info
-_logger = logging.getLogger("theano.scan.op")
+_logger = logging.getLogger("aesara.scan.op")
 
 
 class Scan(Op):
@@ -112,12 +112,12 @@ class Scan(Op):
         of different types of arguments, name, mode, if it should run on GPU or
         not, etc.).
     typeConstructor
-        Function that constructs an equivalent to Theano TensorType.
+        Function that constructs an equivalent to Aesara TensorType.
 
     Notes
     -----
     ``typeConstructor`` had been added to refactor how
-    Theano deals with the GPU. If it runs on the GPU, scan needs
+    Aesara deals with the GPU. If it runs on the GPU, scan needs
     to construct certain outputs (those who reside in the GPU
     memory) as the GPU-specific type.  However we can not import
     gpu code in this file (as it is in sandbox, and not available
@@ -264,7 +264,7 @@ def validate_inner_graph(self):
         # If scan has the flag 'gpua' set to false (meaning that is shouldn't
         # use the gpuarray gpu backend ), ensure that is has no input and no
         # output with type GpuArrayType
-        from theano.gpuarray import GpuArrayType
+        from aesara.gpuarray import GpuArrayType
 
         if not self.info.get("gpua", False):
             for inp in self.inputs:
@@ -426,7 +426,7 @@ def check_broadcast(v1, v2):
                 "axis %d in `output_info`. This can happen if one of the "
                 "dimension is fixed to 1 in the input, while it is still "
                 "variable in the output, or vice-verca. You have to make "
-                "them consistent, e.g. using theano.tensor."
+                "them consistent, e.g. using aesara.tensor."
                 "{patternbroadcast,unbroadcast,addbroadcast}."
             )
             size = min(len(v1.broadcastable), len(v2.broadcastable))
@@ -1514,7 +1514,7 @@ def perform(self, node, inputs, output_storage, params=None):
                 pdx = offset + self.n_shared_outs
                 cond = inner_output_storage[pdx].storage[0] == 0
 
-            # 5.2. By calling fn() directly instead of calling the theano
+            # 5.2. By calling fn() directly instead of calling the aesara
             # function, it is possible that the updates have not been
             # performed. Perform the updates if needed.
             offset_out = len(inner_output_storage) - 1
@@ -1617,7 +1617,7 @@ def perform(self, node, inputs, output_storage, params=None):
                                 "This may be caused by a pushout optimization."
                                 " Try adding "
                                 "'optimizer_excluding=scanOp_pushout_output' "
-                                "to your Theano flags."
+                                "to your Aesara flags."
                             )
                             raise ne from e
 
@@ -1892,7 +1892,7 @@ def connection_pattern(self, node):
 
         # We cache the result of this function because, with a previous
         # implementation that repeatedly called grad, there were cases
-        # where calls to theano.grad() took as much as 4h for functions
+        # where calls to aesara.grad() took as much as 4h for functions
         # containing many nested scans.
         if hasattr(node.tag, "connection_pattern"):
             return node.tag.connection_pattern
@@ -2283,7 +2283,7 @@ def compute_all_gradients(known_grads):
                         if not isinstance(dC_douts[outer_oidx].type, DisconnectedType):
                             dtypes.append(dC_douts[outer_oidx].dtype)
                 if dtypes:
-                    new_dtype = theano.scalar.upcast(*dtypes)
+                    new_dtype = aesara.scalar.upcast(*dtypes)
                 else:
                     new_dtype = config.floatX
                 dC_dXt = safe_new(Xt, dtype=new_dtype)
@@ -3076,7 +3076,7 @@ def R_op(self, inputs, eval_points):
         return final_outs
 
 
-# Since Scan is an op that contains a Theano compiled function, it is
+# Since Scan is an op that contains an Aesara compiled function, it is
 # useful to let DebugMode know about it.
 ops_with_inner_function[Scan] = "fn"
 
@@ -3108,11 +3108,11 @@ def profile_printer(
             if isinstance(node.op, Scan) and not node.op.fn.profile:
                 print(
                     "  One scan node do not have its inner profile enabled. "
-                    "If you enable Theano profiler with "
-                    "'theano.function(..., profile=True)', you must manually"
+                    "If you enable Aesara profiler with "
+                    "'aesara.function(..., profile=True)', you must manually"
                     " enable the profiling for each scan too: "
-                    "'theano.scan(...,profile=True)'."
-                    " Or use Theano flag 'profile=True'.",
+                    "'aesara.scan(...,profile=True)'."
+                    " Or use Aesara flag 'profile=True'.",
                     file=file,
                 )
             elif isinstance(node.op, Scan) and node.op.fn.profile:
diff --git a/theano/scan/opt.py b/aesara/scan/opt.py
similarity index 98%
rename from theano/scan/opt.py
rename to aesara/scan/opt.py
index 3c30ea1e41..9a2ba00054 100644
--- a/theano/scan/opt.py
+++ b/aesara/scan/opt.py
@@ -57,13 +57,13 @@
 
 import numpy as np
 
-import theano
-from theano import scalar as ts
-from theano import tensor as tt
-from theano.compile import optdb
-from theano.compile.function.types import deep_copy_op
-from theano.configdefaults import config
-from theano.graph.basic import (
+import aesara
+from aesara import scalar as ts
+from aesara import tensor as tt
+from aesara.compile import optdb
+from aesara.compile.function.types import deep_copy_op
+from aesara.configdefaults import config
+from aesara.graph.basic import (
     Constant,
     Variable,
     clone_replace,
@@ -72,13 +72,13 @@
     io_toposort,
     is_in_ancestors,
 )
-from theano.graph.destroyhandler import DestroyHandler
-from theano.graph.fg import InconsistencyError
-from theano.graph.opt import GlobalOptimizer, in2out, local_optimizer
-from theano.graph.optdb import EquilibriumDB, SequenceDB
-from theano.graph.toolbox import ReplaceValidate
-from theano.scan.op import Scan
-from theano.scan.utils import (
+from aesara.graph.destroyhandler import DestroyHandler
+from aesara.graph.fg import InconsistencyError
+from aesara.graph.opt import GlobalOptimizer, in2out, local_optimizer
+from aesara.graph.optdb import EquilibriumDB, SequenceDB
+from aesara.graph.toolbox import ReplaceValidate
+from aesara.scan.op import Scan
+from aesara.scan.utils import (
     compress_outs,
     expand_empty,
     reconstruct_graph,
@@ -86,20 +86,20 @@
     scan_args,
     scan_can_remove_outs,
 )
-from theano.tensor import basic_opt, math_opt
-from theano.tensor.basic import Alloc, AllocEmpty, get_scalar_constant_value
-from theano.tensor.elemwise import DimShuffle, Elemwise
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import Dot, dot, maximum, minimum
-from theano.tensor.shape import shape
-from theano.tensor.subtensor import (
+from aesara.tensor import basic_opt, math_opt
+from aesara.tensor.basic import Alloc, AllocEmpty, get_scalar_constant_value
+from aesara.tensor.elemwise import DimShuffle, Elemwise
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import Dot, dot, maximum, minimum
+from aesara.tensor.shape import shape
+from aesara.tensor.subtensor import (
     IncSubtensor,
     Subtensor,
     get_canonical_form_slice,
     get_idx_list,
     set_subtensor,
 )
-from theano.tensor.var import TensorConstant
+from aesara.tensor.var import TensorConstant
 
 
 __docformat__ = "restructedtext en"
@@ -115,7 +115,7 @@
 
 
 # Logging function for sending warning or info
-_logger = logging.getLogger("theano.scan.opt")
+_logger = logging.getLogger("aesara.scan.opt")
 
 list_opt_slice = [
     math_opt.local_abs_merge,
@@ -318,8 +318,8 @@ def add_to_replace(y):
                 # we can do this because the assumption is that a
                 # viewOp or deepCopyOp will be just at the end of the
                 # function and not somewhere in the middle ..
-                not isinstance(nd.op, theano.compile.ViewOp)
-                and not isinstance(nd.op, theano.compile.DeepCopyOp)
+                not isinstance(nd.op, aesara.compile.ViewOp)
+                and not isinstance(nd.op, aesara.compile.DeepCopyOp)
             ):
 
                 # We have a candidate node to removable
@@ -341,7 +341,7 @@ def add_to_replace(y):
                                 "operations`. The optimization tries "
                                 "to move some computation fron scan "
                                 "which is not allowed to move. Report "
-                                "this on theano-users list"
+                                "this on aesara-users list"
                             ),
                             x,
                         )
@@ -555,7 +555,7 @@ def add_to_replace(y):
                                 "operations`. The optimization tries "
                                 "to move some computation fron scan "
                                 "which is not allowed to move. Report "
-                                "this on theano-users list"
+                                "this on aesara-users list"
                             ),
                             x,
                         )
@@ -1071,7 +1071,7 @@ def apply(self, fgraph):
             # gpuarray might be imported but not its GpuAlloc and
             # GpuAllopEmpty ops.
             try:
-                alloc_ops += (theano.gpuarray.GpuAlloc, theano.gpuarray.GpuAllocEmpty)
+                alloc_ops += (aesara.gpuarray.GpuAlloc, aesara.gpuarray.GpuAllocEmpty)
             except Exception:
                 pass
 
diff --git a/theano/scan/scan_perform.pyx b/aesara/scan/scan_perform.pyx
similarity index 99%
rename from theano/scan/scan_perform.pyx
rename to aesara/scan/scan_perform.pyx
index f70e18614e..44177e08c6 100644
--- a/theano/scan/scan_perform.pyx
+++ b/aesara/scan/scan_perform.pyx
@@ -60,7 +60,7 @@ cimport numpy
 import copy
 import time
 
-from theano.link.utils import raise_with_op
+from aesara.link.utils import raise_with_op
 
 
 def get_version():
@@ -155,7 +155,7 @@ def perform(
         that code)
     fnct: python object
         Only used to attach some timings for the profile mode ( can be
-        skiped if we don't care about Theano's profile mode)
+        skiped if we don't care about Aesara's profile mode)
     destroy_map
         Array of boolean saying if an output is computed inplace
     args: list of ndarrays (and random states)
@@ -423,7 +423,7 @@ def perform(
             pdx = offset + n_shared_outs
             cond = output_storage[pdx].storage[0] == 0
 
-        # 5.2. By calling fn() directly instead of calling the theano
+        # 5.2. By calling fn() directly instead of calling the aesara
         # function, it is possible that the updates have not been
         # performed. Perform the updates if needed.
         offset_out = len(output_storage) - 1
@@ -555,7 +555,7 @@ def perform(
                             "This may be caused by a pushout optimization."
                             " Try adding "
                             "'optimizer_excluding=scanOp_pushout_output' "
-                            "to your Theano flags.")
+                            "to your Aesara flags.")
 
         # 5.6 Copy over the values for outputs corresponding to shared
         # variables
diff --git a/theano/scan/scan_perform_ext.py b/aesara/scan/scan_perform_ext.py
similarity index 92%
rename from theano/scan/scan_perform_ext.py
rename to aesara/scan/scan_perform_ext.py
index 3c9f13b0d9..62e1465157 100644
--- a/theano/scan/scan_perform_ext.py
+++ b/aesara/scan/scan_perform_ext.py
@@ -10,16 +10,16 @@
 import sys
 from importlib import reload
 
-import theano
-from theano.compile.compilelock import lock_ctx
-from theano.configdefaults import config
-from theano.link.c import cmodule
+import aesara
+from aesara.compile.compilelock import lock_ctx
+from aesara.configdefaults import config
+from aesara.link.c import cmodule
 
 
 if not config.cxx:
     raise ImportError("No C compiler; cannot compile Cython-generated code")
 
-_logger = logging.getLogger("theano.scan.scan_perform")
+_logger = logging.getLogger("aesara.scan.scan_perform")
 
 version = 0.298  # must match constant returned in function get_version()
 
@@ -70,7 +70,7 @@ def try_reload():
         except ImportError:
             _logger.info("Compiling C code for scan")
 
-            cfile = os.path.join(theano.__path__[0], "scan", "c_code", "scan_perform.c")
+            cfile = os.path.join(aesara.__path__[0], "scan", "c_code", "scan_perform.c")
 
             if not os.path.exists(cfile):
                 raise ImportError(
diff --git a/theano/scan/utils.py b/aesara/scan/utils.py
similarity index 96%
rename from theano/scan/utils.py
rename to aesara/scan/utils.py
index 323a45def1..e27296fd37 100644
--- a/theano/scan/utils.py
+++ b/aesara/scan/utils.py
@@ -19,27 +19,27 @@
 
 import numpy as np
 
-from theano import scalar as ts
-from theano import tensor as tt
-from theano.configdefaults import config
-from theano.graph.basic import (
+from aesara import scalar as ts
+from aesara import tensor as tt
+from aesara.configdefaults import config
+from aesara.graph.basic import (
     Constant,
     Variable,
     clone_replace,
     equal_computations,
     graph_inputs,
 )
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import get_test_value
-from theano.graph.opt import TopoOptimizer, local_optimizer
-from theano.graph.utils import TestValueError
-from theano.tensor.basic import AllocEmpty, get_scalar_constant_value
-from theano.tensor.subtensor import set_subtensor
-from theano.tensor.var import TensorConstant
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import get_test_value
+from aesara.graph.opt import TopoOptimizer, local_optimizer
+from aesara.graph.utils import TestValueError
+from aesara.tensor.basic import AllocEmpty, get_scalar_constant_value
+from aesara.tensor.subtensor import set_subtensor
+from aesara.tensor.var import TensorConstant
 
 
 # Logging function for sending warning or info
-_logger = logging.getLogger("theano.scan.utils")
+_logger = logging.getLogger("aesara.scan.utils")
 
 
 def safe_new(x, tag="", dtype=None):
@@ -151,9 +151,9 @@ def traverse(out, x, x_copy, d, visited=None):
     if out in visited:
         return d
     visited.add(out)
-    from theano.gpuarray import pygpu_activated
-    from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu
-    from theano.gpuarray.type import GpuArrayType
+    from aesara.gpuarray import pygpu_activated
+    from aesara.gpuarray.basic_ops import GpuFromHost, host_from_gpu
+    from aesara.gpuarray.type import GpuArrayType
 
     if out == x:
         assert isinstance(x.type, GpuArrayType)
@@ -202,9 +202,9 @@ def map_variables(replacer, graphs, additional_inputs=None):
 
         tag = "replaceme"
 
-        a = theano.tensor.type.scalar("a")
-        b = theano.tensor.type.scalar("b")
-        c = theano.tensor.type.scalar("c")
+        a = aesara.tensor.type.scalar("a")
+        b = aesara.tensor.type.scalar("b")
+        c = aesara.tensor.type.scalar("c")
 
         ab = a + b
         ab.tag.replacement = a * b
@@ -255,8 +255,8 @@ def local_transform(fgraph, node):
             return False
 
         # importing Scan into module scope would be circular
-        from theano.compile.builders import OpFromGraph
-        from theano.scan.op import Scan
+        from aesara.compile.builders import OpFromGraph
+        from aesara.scan.op import Scan
 
         if isinstance(node.op, (Scan, OpFromGraph)):
             # recurse on the inner graph
@@ -325,7 +325,7 @@ def _map_variables_inner(
 
     from itertools import chain
 
-    from theano.scan import utils
+    from aesara.scan import utils
 
     def inner_replacer(graph):
         new_graph = replacer(graph)
@@ -358,7 +358,7 @@ def inner_replacer(graph):
 
         for outer_input in foreign_inputs:
             if getattr(outer_input, "update", False):
-                # when theano.scan() constructs a scan node, it detects
+                # when aesara.scan() constructs a scan node, it detects
                 # shared variables with updates and returns these updates
                 # to the user.  we need to do the same thing for every new
                 # use of such a variable that is introduced.  it's hard to
@@ -447,8 +447,8 @@ def _filter(x):
         """
         Ensure `x` is made only of allowed data types.
 
-        Return True iff `x` is made only of lists, tuples, dictionaries, Theano
-        variables or `theano.scan.utils.until` objects.
+        Return True iff `x` is made only of lists, tuples, dictionaries, Aesara
+        variables or `aesara.scan.utils.until` objects.
 
         """
         # Is `x` a container we can iterate on?
@@ -465,11 +465,11 @@ def _filter(x):
     if not _filter(ls):
         raise ValueError(
             "The return value of your scan lambda expression may only be "
-            "made of lists, tuples, or dictionaries containing Theano "
-            "variables (or `theano.scan.utils.until` objects for "
+            "made of lists, tuples, or dictionaries containing Aesara "
+            "variables (or `aesara.scan.utils.until` objects for "
             "conditions). In particular if you need to use constant "
             "values, you can use `tensor.constant` to turn them into "
-            "Theano variables."
+            "Aesara variables."
         )
 
     if is_outputs(ls):
@@ -1109,9 +1109,9 @@ def forced_replace(out, x, y):
 
     Parameters
     ----------
-    out : Theano Variable
-    x : Theano Variable
-    y : Theano Variable
+    out : Aesara Variable
+    x : Aesara Variable
+    y : Aesara Variable
 
     Examples
     --------
diff --git a/theano/scan/views.py b/aesara/scan/views.py
similarity index 98%
rename from theano/scan/views.py
rename to aesara/scan/views.py
index 2c58ca40ab..71ae5b610c 100644
--- a/theano/scan/views.py
+++ b/aesara/scan/views.py
@@ -8,10 +8,10 @@
 
 import logging
 
-from theano.scan import scan
+from aesara.scan import scan
 
 
-_logger = logging.getLogger("theano.scan.views")
+_logger = logging.getLogger("aesara.scan.views")
 
 
 def map(
diff --git a/theano/sparse/__init__.py b/aesara/sparse/__init__.py
similarity index 72%
rename from theano/sparse/__init__.py
rename to aesara/sparse/__init__.py
index 4ac05e7fa3..89c4b2ceb8 100644
--- a/theano/sparse/__init__.py
+++ b/aesara/sparse/__init__.py
@@ -9,13 +9,13 @@
     enable_sparse = False
     warn("SciPy can't be imported.  Sparse matrix support is disabled.")
 
-from theano.sparse.type import SparseType, _is_sparse
+from aesara.sparse.type import SparseType, _is_sparse
 
 
 if enable_sparse:
-    from theano.sparse import opt, sharedvar
-    from theano.sparse.basic import *
-    from theano.sparse.sharedvar import sparse_constructor as shared
+    from aesara.sparse import opt, sharedvar
+    from aesara.sparse.basic import *
+    from aesara.sparse.sharedvar import sparse_constructor as shared
 
     def sparse_grad(var):
         """This function return a new variable whose gradient will be
@@ -26,7 +26,7 @@ def sparse_grad(var):
 
         .. versionadded:: 0.6rc4
         """
-        from theano.tensor.subtensor import AdvancedSubtensor1
+        from aesara.tensor.subtensor import AdvancedSubtensor1
 
         assert isinstance(var.owner.op, AdvancedSubtensor1)
 
diff --git a/theano/sparse/basic.py b/aesara/sparse/basic.py
similarity index 98%
rename from theano/sparse/basic.py
rename to aesara/sparse/basic.py
index 1e1687b88a..14acede19b 100644
--- a/theano/sparse/basic.py
+++ b/aesara/sparse/basic.py
@@ -13,23 +13,23 @@
 import scipy.sparse
 from numpy.lib.stride_tricks import as_strided
 
-import theano
-from theano import scalar as ts
-from theano.configdefaults import config
-from theano.gradient import DisconnectedType, grad_not_implemented, grad_undefined
-from theano.graph.basic import Apply, Constant, Variable
-from theano.graph.op import COp, Op
-from theano.misc.safe_asarray import _asarray
-from theano.sparse.type import SparseType, _is_sparse
-from theano.sparse.utils import hash_from_sparse
-from theano.tensor import basic as tt
-from theano.tensor.basic import Split
-from theano.tensor.math import add as tt_add
-from theano.tensor.math import arcsin, arcsinh, arctan, arctanh, ceil, conj, deg2rad
-from theano.tensor.math import dot as tt_dot
-from theano.tensor.math import exp, expm1, floor, log, log1p, maximum, minimum
-from theano.tensor.math import pow as tt_pow
-from theano.tensor.math import (
+import aesara
+from aesara import scalar as ts
+from aesara.configdefaults import config
+from aesara.gradient import DisconnectedType, grad_not_implemented, grad_undefined
+from aesara.graph.basic import Apply, Constant, Variable
+from aesara.graph.op import COp, Op
+from aesara.misc.safe_asarray import _asarray
+from aesara.sparse.type import SparseType, _is_sparse
+from aesara.sparse.utils import hash_from_sparse
+from aesara.tensor import basic as tt
+from aesara.tensor.basic import Split
+from aesara.tensor.math import add as tt_add
+from aesara.tensor.math import arcsin, arcsinh, arctan, arctanh, ceil, conj, deg2rad
+from aesara.tensor.math import dot as tt_dot
+from aesara.tensor.math import exp, expm1, floor, log, log1p, maximum, minimum
+from aesara.tensor.math import pow as tt_pow
+from aesara.tensor.math import (
     rad2deg,
     round_half_to_even,
     sgn,
@@ -41,11 +41,11 @@
     tanh,
     trunc,
 )
-from theano.tensor.shape import shape
-from theano.tensor.type import TensorType
-from theano.tensor.type import continuous_dtypes as tensor_continuous_dtypes
-from theano.tensor.type import discrete_dtypes as tensor_discrete_dtypes
-from theano.tensor.type import iscalar, ivector, scalar, tensor, vector
+from aesara.tensor.shape import shape
+from aesara.tensor.type import TensorType
+from aesara.tensor.type import continuous_dtypes as tensor_continuous_dtypes
+from aesara.tensor.type import discrete_dtypes as tensor_discrete_dtypes
+from aesara.tensor.type import iscalar, ivector, scalar, tensor, vector
 
 
 sparse_formats = ["csc", "csr"]
@@ -384,7 +384,7 @@ def __hash__(self):
         (a, b) = self
         return hash(type(self)) ^ hash(a) ^ hash(type(b))
 
-    def theano_hash(self):
+    def aesara_hash(self):
         (_, d) = self
         return hash_from_sparse(d)
 
@@ -975,7 +975,7 @@ def infer_shape(self, fgraph, node, shapes):
 
 Returns
 -------
-theano.tensor.matrix
+aesara.tensor.matrix
     A dense matrix, the same as `x`.
 
 Notes
@@ -1001,7 +1001,7 @@ def make_node(self, x):
         x = tt.as_tensor_variable(x)
         if x.ndim > 2:
             raise TypeError(
-                "Theano does not have sparse tensor types with more "
+                "Aesara does not have sparse tensor types with more "
                 f"than 2 dimensions, but {x}.ndim = {x.ndim}"
             )
         elif x.ndim == 1:
@@ -1210,7 +1210,7 @@ def grad(self, inputs, g_outputs):
 
 Returns
 -------
-theano.tensor.vector
+aesara.tensor.vector
     The corresponding elements in `x`.
 
 """
@@ -1274,11 +1274,11 @@ def make_node(self, x, index):
         assert len(index) in [1, 2]
 
         input_op = [x]
-        generic_None = Constant(theano.graph.type.generic, None)
+        generic_None = Constant(aesara.graph.type.generic, None)
 
         for ind in index:
             if isinstance(ind, slice):
-                # in case of slice is written in theano variable
+                # in case of slice is written in aesara variable
                 start = ind.start
                 stop = ind.stop
                 step = ind.step
@@ -1339,7 +1339,7 @@ def make_node(self, x, index):
                 isinstance(ind, Variable) and getattr(ind, "ndim", -1) == 0
             ) or np.isscalar(ind):
                 raise NotImplementedError(
-                    "Theano has no sparse vector"
+                    "Aesara has no sparse vector"
                     + "Use X[a:b, c:d], X[a:b, c:c+1] or X[a:b] instead."
                 )
             else:
@@ -1389,7 +1389,7 @@ def perform(self, node, inputs, outputs):
 The above indexing methods are not supported because the return value
 would be a sparse matrix rather than a sparse vector, which is a
 deviation from numpy indexing rule. This decision is made largely
-to preserve consistency between numpy and theano. This may be revised
+to preserve consistency between numpy and aesara. This may be revised
 when sparse vectors are supported.
 
 The grad is not implemented for this op.
@@ -1416,12 +1416,12 @@ def make_node(self, x, index):
             if isinstance(ind, slice):
                 raise Exception("GetItemScalar called with a slice as index!")
 
-            # in case of indexing using int instead of theano variable
+            # in case of indexing using int instead of aesara variable
             elif isinstance(ind, int):
                 ind = tt.constant(ind)
                 input_op += [ind]
 
-            # in case of indexing using theano variable
+            # in case of indexing using aesara variable
             elif ind.ndim == 0:
                 input_op += [ind]
             else:
@@ -1453,7 +1453,7 @@ def perform(self, node, inputs, outputs):
 
 Returns
 -------
-TheanoVariable
+AesaraVariable
     The corresponding item in `x`.
 
 Notes
@@ -1765,11 +1765,11 @@ def grad(self, inputs, gout):
             return [x.zeros_like(dtype=config.floatX)]
         if self.structured:
             if self.axis is None:
-                r = gz * theano.sparse.sp_ones_like(x)
+                r = gz * aesara.sparse.sp_ones_like(x)
             elif self.axis == 0:
-                r = col_scale(theano.sparse.sp_ones_like(x), gz)
+                r = col_scale(aesara.sparse.sp_ones_like(x), gz)
             elif self.axis == 1:
-                r = row_scale(theano.sparse.sp_ones_like(x), gz)
+                r = row_scale(aesara.sparse.sp_ones_like(x), gz)
             else:
                 raise ValueError("Illegal value for self.axis.")
         else:
@@ -3189,7 +3189,7 @@ def wrapper(*args):
     return decorator
 
 
-@structured_monoid(theano.tensor.nnet.sigmoid)
+@structured_monoid(aesara.tensor.nnet.sigmoid)
 def structured_sigmoid(x):
     """
     Structured elemwise sigmoid.
@@ -4132,7 +4132,7 @@ def make_node(self, x, y):
             assert y.format in ["csr", "csc"]
             if x.ndim not in (1, 2):
                 raise TypeError(
-                    "theano.sparse.Dot: input 0 (0-indexed) must have ndim of "
+                    "Input 0 (0-indexed) must have ndim of "
                     f"1 or 2, {int(x.ndim)} given."
                 )
 
@@ -4144,7 +4144,7 @@ def make_node(self, x, y):
             assert x.format in ["csr", "csc"]
             if y.ndim not in (1, 2):
                 raise TypeError(
-                    "theano.sparse.Dot: input 1 (1-indexed) must have ndim of "
+                    "Input 1 (1-indexed) must have ndim of "
                     f"1 or 2, {int(y.ndim)} given."
                 )
 
@@ -4407,7 +4407,7 @@ def grad(self, inputs, grads):
         idx_list = inputs[2:]
 
         gx = g_output
-        gy = theano.tensor.subtensor.advanced_subtensor1(g_output, *idx_list)
+        gy = aesara.tensor.subtensor.advanced_subtensor1(g_output, *idx_list)
 
         return [gx, gy] + [DisconnectedType()()] * len(idx_list)
 
diff --git a/theano/sparse/opt.py b/aesara/sparse/opt.py
similarity index 99%
rename from theano/sparse/opt.py
rename to aesara/sparse/opt.py
index 3aea29e748..2ee93f7002 100644
--- a/theano/sparse/opt.py
+++ b/aesara/sparse/opt.py
@@ -1,15 +1,15 @@
 import numpy as np
 import scipy
 
-import theano
-import theano.scalar as ts
-from theano.configdefaults import config
-from theano.graph.basic import Apply
-from theano.graph.op import COp, _NoPythonCOp
-from theano.graph.opt import PatternSub, TopoOptimizer, local_optimizer
-from theano.misc.safe_asarray import _asarray
-from theano.sparse import basic as sparse
-from theano.sparse.basic import (
+import aesara
+import aesara.scalar as ts
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp, _NoPythonCOp
+from aesara.graph.opt import PatternSub, TopoOptimizer, local_optimizer
+from aesara.misc.safe_asarray import _asarray
+from aesara.sparse import basic as sparse
+from aesara.sparse.basic import (
     CSC,
     CSR,
     csm_data,
@@ -19,11 +19,11 @@
     csm_properties,
     usmm,
 )
-from theano.tensor import blas
-from theano.tensor.basic import as_tensor_variable, cast, patternbroadcast
-from theano.tensor.basic_opt import register_canonicalize, register_specialize
-from theano.tensor.math import mul, neg, sub
-from theano.tensor.type import TensorType, tensor
+from aesara.tensor import blas
+from aesara.tensor.basic import as_tensor_variable, cast, patternbroadcast
+from aesara.tensor.basic_opt import register_canonicalize, register_specialize
+from aesara.tensor.math import mul, neg, sub
+from aesara.tensor.type import TensorType, tensor
 
 
 _is_sparse_variable = sparse._is_sparse_variable
@@ -72,7 +72,7 @@ def local_inplace_remove0(fgraph, node):
     return False
 
 
-theano.compile.optdb.register(
+aesara.compile.optdb.register(
     "local_inplace_remove0",
     TopoOptimizer(local_inplace_remove0, failure_callback=TopoOptimizer.warn_inplace),
     60,
@@ -211,7 +211,7 @@ def local_inplace_addsd_ccode(fgraph, node):
     return False
 
 
-theano.compile.optdb.register(
+aesara.compile.optdb.register(
     "local_inplace_addsd_ccode",
     TopoOptimizer(
         local_inplace_addsd_ccode, failure_callback=TopoOptimizer.warn_inplace
@@ -244,7 +244,7 @@ def local_addsd_ccode(fgraph, node):
     return False
 
 
-theano.compile.optdb.register(
+aesara.compile.optdb.register(
     "local_addsd_ccode",
     TopoOptimizer(local_addsd_ccode),
     # Must be after local_inplace_addsd_ccode at 60
diff --git a/theano/sparse/sandbox/__init__.py b/aesara/sparse/sandbox/__init__.py
similarity index 100%
rename from theano/sparse/sandbox/__init__.py
rename to aesara/sparse/sandbox/__init__.py
diff --git a/theano/sparse/sandbox/sp.py b/aesara/sparse/sandbox/sp.py
similarity index 97%
rename from theano/sparse/sandbox/sp.py
rename to aesara/sparse/sandbox/sp.py
index 4d4c11b4f2..a7dd15264f 100644
--- a/theano/sparse/sandbox/sp.py
+++ b/aesara/sparse/sandbox/sp.py
@@ -11,19 +11,19 @@
 import numpy as np
 from scipy import sparse as scipy_sparse
 
-import theano
-import theano.sparse
-from theano import sparse
-from theano import tensor as tt
-from theano.graph.op import Op
-from theano.tensor.math import dot
-from theano.tensor.math import max as tt_max
-from theano.tensor.shape import reshape
-from theano.tensor.subtensor import DimShuffle
+import aesara
+import aesara.sparse
+from aesara import sparse
+from aesara import tensor as tt
+from aesara.graph.op import Op
+from aesara.tensor.math import dot
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.shape import reshape
+from aesara.tensor.subtensor import DimShuffle
 
 
 def register_specialize(lopt, *tags, **kwargs):
-    theano.compile.optdb["specialize"].register(
+    aesara.compile.optdb["specialize"].register(
         (kwargs and kwargs.pop("name")) or lopt.__name__, lopt, "fast_run", *tags
     )
 
@@ -349,7 +349,7 @@ def convolve(
     )
 
     # build sparse matrix, then generate stack of image patches
-    csc = theano.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr, spmat_shape)
+    csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr, spmat_shape)
     patches = (sparse.structured_dot(csc, images.T)).T
 
     # compute output of linear classifier
@@ -418,7 +418,7 @@ def max_pool(images, imgshp, maxpoolshp):
     #    print 'outshp = ', outshp
 
     # build sparse matrix, then generate stack of image patches
-    csc = theano.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr, spmat_shape)
+    csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr, spmat_shape)
     patches = sparse.structured_dot(csc, images.T).T
 
     pshape = tt.stack(
diff --git a/theano/sparse/sandbox/sp2.py b/aesara/sparse/sandbox/sp2.py
similarity index 85%
rename from theano/sparse/sandbox/sp2.py
rename to aesara/sparse/sandbox/sp2.py
index f9b4c9d303..135831e0ee 100644
--- a/theano/sparse/sandbox/sp2.py
+++ b/aesara/sparse/sandbox/sp2.py
@@ -1,25 +1,25 @@
 import numpy as np
 import scipy.sparse
 
-import theano
-from theano import tensor as tt
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.sparse.basic import (
+import aesara
+from aesara import tensor as tt
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.sparse.basic import (
     Remove0,
     SparseType,
     _is_sparse,
     as_sparse_variable,
     remove0,
 )
-from theano.tensor.type import discrete_dtypes, float_dtypes
+from aesara.tensor.type import discrete_dtypes, float_dtypes
 
 
 # Probability Ops are currently back in sandbox, because they do not respect
-# Theano's Op contract, as their behaviour is not reproducible: calling
+# Aesara's Op contract, as their behaviour is not reproducible: calling
 # the perform() method twice with the same argument will yield different
 # results.
-# from theano.sparse.basic import (
+# from aesara.sparse.basic import (
 #    Multinomial, multinomial, Poisson, poisson,
 #    Binomial, csr_fbinomial, csc_fbinomial, csr_dbinomial, csc_dbinomial)
 
@@ -36,7 +36,7 @@ class Poisson(Op):
 
     WARNING: This Op is NOT deterministic, as calling it twice with the
     same inputs will NOT give the same result. This is a violation of
-    Theano's contract for Ops
+    Aesara's contract for Ops
 
     :param x: Sparse matrix.
 
@@ -61,9 +61,9 @@ def perform(self, node, inputs, outputs):
 
     def grad(self, inputs, outputs_gradients):
         comment = "No gradient exists for class Poisson in\
-                   theano/sparse/sandbox/sp2.py"
+                   aesara/sparse/sandbox/sp2.py"
         return [
-            theano.gradient.grad_undefined(
+            aesara.gradient.grad_undefined(
                 op=self, x_pos=0, x=inputs[0], comment=comment
             )
         ]
@@ -82,7 +82,7 @@ class Binomial(Op):
 
     WARNING: This Op is NOT deterministic, as calling it twice with the
     same inputs will NOT give the same result. This is a violation of
-    Theano's contract for Ops
+    Aesara's contract for Ops
 
     :param n: Tensor scalar representing the number of experiment.
     :param p: Tensor scalar representing the probability of success.
@@ -125,13 +125,13 @@ def grad(self, inputs, gout):
         (n, p, shape) = inputs
         (gz,) = gout
         comment_n = "No gradient exists for the number of samples in class\
-                     Binomial of theano/sparse/sandbox/sp2.py"
+                     Binomial of aesara/sparse/sandbox/sp2.py"
         comment_p = "No gradient exists for the prob of success in class\
-                     Binomial of theano/sparse/sandbox/sp2.py"
+                     Binomial of aesara/sparse/sandbox/sp2.py"
         return [
-            theano.gradient.grad_undefined(op=self, x_pos=0, x=n, comment=comment_n),
-            theano.gradient.grad_undefined(op=self, x_pos=1, x=p, comment=comment_p),
-            theano.gradient.disconnected_type(),
+            aesara.gradient.grad_undefined(op=self, x_pos=0, x=n, comment=comment_n),
+            aesara.gradient.grad_undefined(op=self, x_pos=1, x=p, comment=comment_p),
+            aesara.gradient.disconnected_type(),
         ]
 
     def infer_shape(self, fgraph, node, ins_shapes):
@@ -151,7 +151,7 @@ class Multinomial(Op):
 
     WARNING: This Op is NOT deterministic, as calling it twice with the
     same inputs will NOT give the same result. This is a violation of
-    Theano's contract for Ops
+    Aesara's contract for Ops
 
     :param n: Tensor type vector or scalar representing the number of
               experiment for each row. If `n` is a scalar, it will be
@@ -201,14 +201,14 @@ def perform(self, node, inputs, outputs):
 
     def grad(self, inputs, outputs_gradients):
         comment_n = "No gradient exists for the number of samples in class\
-                     Multinomial of theano/sparse/sandbox/sp2.py"
+                     Multinomial of aesara/sparse/sandbox/sp2.py"
         comment_p = "No gradient exists for the prob of success in class\
-                     Multinomial of theano/sparse/sandbox/sp2.py"
+                     Multinomial of aesara/sparse/sandbox/sp2.py"
         return [
-            theano.gradient.grad_undefined(
+            aesara.gradient.grad_undefined(
                 op=self, x_pos=0, x=inputs[0], comment=comment_n
             ),
-            theano.gradient.grad_undefined(
+            aesara.gradient.grad_undefined(
                 op=self, x_pos=1, x=inputs[1], comment=comment_p
             ),
         ]
diff --git a/theano/sparse/sharedvar.py b/aesara/sparse/sharedvar.py
similarity index 88%
rename from theano/sparse/sharedvar.py
rename to aesara/sparse/sharedvar.py
index 546fc67a2b..eb512bd678 100644
--- a/theano/sparse/sharedvar.py
+++ b/aesara/sparse/sharedvar.py
@@ -2,8 +2,8 @@
 
 import scipy.sparse
 
-from theano.compile import SharedVariable, shared_constructor
-from theano.sparse.basic import SparseType, _sparse_py_operators
+from aesara.compile import SharedVariable, shared_constructor
+from aesara.sparse.basic import SparseType, _sparse_py_operators
 
 
 class SparseTensorSharedVariable(_sparse_py_operators, SharedVariable):
diff --git a/theano/sparse/type.py b/aesara/sparse/type.py
similarity index 98%
rename from theano/sparse/type.py
rename to aesara/sparse/type.py
index ace75bc772..3d2d2a68ec 100644
--- a/theano/sparse/type.py
+++ b/aesara/sparse/type.py
@@ -9,8 +9,8 @@
     imported_scipy = False
 
 
-import theano
-from theano.graph.type import Type
+import aesara
+from aesara.graph.type import Type
 
 
 def _is_sparse(x):
@@ -210,7 +210,7 @@ def get_size(self, shape_info):
 
 
 # Register SparseType's C code for ViewOp.
-theano.compile.register_view_op_c_code(
+aesara.compile.register_view_op_c_code(
     SparseType,
     """
     Py_XDECREF(%(oname)s);
diff --git a/theano/sparse/utils.py b/aesara/sparse/utils.py
similarity index 94%
rename from theano/sparse/utils.py
rename to aesara/sparse/utils.py
index 6e8590133f..2319366c21 100644
--- a/theano/sparse/utils.py
+++ b/aesara/sparse/utils.py
@@ -1,4 +1,4 @@
-from theano.utils import hash_from_code
+from aesara.utils import hash_from_code
 
 
 def hash_from_sparse(data):
diff --git a/theano/tensor/__init__.py b/aesara/tensor/__init__.py
similarity index 81%
rename from theano/tensor/__init__.py
rename to aesara/tensor/__init__.py
index e58e74b9dc..ab2d0192c8 100644
--- a/theano/tensor/__init__.py
+++ b/aesara/tensor/__init__.py
@@ -5,10 +5,10 @@
 
 import warnings
 
-import theano.tensor.exceptions
-from theano.gradient import consider_constant, grad, hessian, jacobian
-from theano.tensor import sharedvar  # adds shared-variable constructors
-from theano.tensor import (
+import aesara.tensor.exceptions
+from aesara.gradient import consider_constant, grad, hessian, jacobian
+from aesara.tensor import sharedvar  # adds shared-variable constructors
+from aesara.tensor import (
     basic_opt,
     blas,
     blas_c,
@@ -18,9 +18,9 @@
     opt_uncanonicalize,
     xlogx,
 )
-from theano.tensor.basic import *
-from theano.tensor.blas import batched_dot, batched_tensordot
-from theano.tensor.extra_ops import (
+from aesara.tensor.basic import *
+from aesara.tensor.blas import batched_dot, batched_tensordot
+from aesara.tensor.extra_ops import (
     bartlett,
     bincount,
     cumprod,
@@ -32,9 +32,9 @@
     squeeze,
     unravel_index,
 )
-from theano.tensor.io import *
-from theano.tensor.math import *
-from theano.tensor.shape import (
+from aesara.tensor.io import *
+from aesara.tensor.math import *
+from aesara.tensor.shape import (
     reshape,
     shape,
     shape_padaxis,
@@ -44,10 +44,10 @@
 )
 
 # We import as `_shared` instead of `shared` to avoid confusion between
-# `theano.shared` and `tensor._shared`.
-from theano.tensor.sort import argsort, argtopk, sort, topk, topk_and_argtopk
-from theano.tensor.subtensor import *
-from theano.tensor.type import (
+# `aesara.shared` and `tensor._shared`.
+from aesara.tensor.sort import argsort, argtopk, sort, topk, topk_and_argtopk
+from aesara.tensor.subtensor import *
+from aesara.tensor.type import (
     TensorType,
     bcol,
     bmatrix,
@@ -203,4 +203,4 @@
     ztensor7,
     zvector,
 )
-from theano.tensor.type_other import *
+from aesara.tensor.type_other import *
diff --git a/theano/tensor/basic.py b/aesara/tensor/basic.py
similarity index 96%
rename from theano/tensor/basic.py
rename to aesara/tensor/basic.py
index 1c1adbbb6a..18e01c306a 100644
--- a/theano/tensor/basic.py
+++ b/aesara/tensor/basic.py
@@ -13,22 +13,22 @@
 
 import numpy as np
 
-import theano
-import theano.scalar.sharedvar
-from theano import compile, config, printing
-from theano import scalar as ts
-from theano.assert_op import Assert, assert_op
-from theano.gradient import DisconnectedType, grad_not_implemented, grad_undefined
-from theano.graph.basic import Apply, Constant, Variable
-from theano.graph.op import COp, Op
-from theano.graph.params_type import ParamsType
-from theano.graph.type import CType
-from theano.misc.safe_asarray import _asarray
-from theano.printing import min_informative_str, pprint
-from theano.scalar import int32
-from theano.tensor.elemwise import DimShuffle, Elemwise, scalar_elemwise
-from theano.tensor.exceptions import EmptyConstantError, NotScalarConstantError
-from theano.tensor.shape import (
+import aesara
+import aesara.scalar.sharedvar
+from aesara import compile, config, printing
+from aesara import scalar as ts
+from aesara.assert_op import Assert, assert_op
+from aesara.gradient import DisconnectedType, grad_not_implemented, grad_undefined
+from aesara.graph.basic import Apply, Constant, Variable
+from aesara.graph.op import COp, Op
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import CType
+from aesara.misc.safe_asarray import _asarray
+from aesara.printing import min_informative_str, pprint
+from aesara.scalar import int32
+from aesara.tensor.elemwise import DimShuffle, Elemwise, scalar_elemwise
+from aesara.tensor.exceptions import EmptyConstantError, NotScalarConstantError
+from aesara.tensor.shape import (
     Shape,
     Shape_i,
     reshape,
@@ -36,7 +36,7 @@
     shape_padaxis,
     shape_padleft,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     TensorType,
     discrete_dtypes,
     float_dtypes,
@@ -48,10 +48,10 @@
     uint_dtypes,
     values_eq_approx_always_true,
 )
-from theano.tensor.var import TensorConstant, TensorVariable
+from aesara.tensor.var import TensorConstant, TensorVariable
 
 
-_logger = logging.getLogger("theano.tensor.basic")
+_logger = logging.getLogger("aesara.tensor.basic")
 
 __docformat__ = "restructuredtext en"
 
@@ -193,7 +193,7 @@ def extract_constants(i):
             "np.array(True) or np.array(False) if you need these constants. "
             "This error might be caused by using the == operator on "
             "Variables. v == w does not do what you think it does, "
-            "use theano.tensor.eq(v, w) instead."
+            "use aesara.tensor.eq(v, w) instead."
         )
 
     return constant(x, name=name, ndim=ndim)
@@ -428,7 +428,7 @@ def get_scalar_constant_value(
                     v.owner.op.perform(v.owner, const, ret)
                     return ret[0][0].copy()
             elif (
-                isinstance(v.owner.op, theano.tensor.subtensor.Subtensor)
+                isinstance(v.owner.op, aesara.tensor.subtensor.Subtensor)
                 and v.ndim == 0
             ):
                 if isinstance(v.owner.inputs[0], TensorConstant):
@@ -448,7 +448,7 @@ def get_scalar_constant_value(
                 assert len(v.owner.op.idx_list) == v.owner.inputs[0].ndim
 
                 # Needed to make better graph in this test in
-                # theano/tensor/tests/test_sharedvar.py:
+                # aesara/tensor/tests/test_sharedvar.py:
                 # test_shared_options.test_specify_shape_partial
                 if (
                     v.owner.inputs[0].owner
@@ -603,7 +603,7 @@ def grad(self, inp, grads):
 
         # If the input dtype is an integer, then so is the output dtype,
         # and the "zero" gradient can be represented in that int dtype.
-        # Currently, theano.grad insists that the dtype of the returned
+        # Currently, aesara.grad insists that the dtype of the returned
         # gradient has a float dtype, so we use floatX.
         if s.type.dtype in discrete_dtypes:
             return [s.zeros_like().astype(config.floatX)]
@@ -667,9 +667,9 @@ class Rebroadcast(COp):
 
     See Also
     --------
-    unbroadcast <theano.tensor.unbroadcast>
-    addbroadcast <theano.tensor.addbroadcast>
-    patternbroadcast <theano.tensor.patternbroadcast>
+    unbroadcast <aesara.tensor.unbroadcast>
+    addbroadcast <aesara.tensor.addbroadcast>
+    patternbroadcast <aesara.tensor.patternbroadcast>
 
     Notes
     -----
@@ -758,7 +758,7 @@ def grad(self, inp, grads):
     def infer_shape(self, fgraph, node, ishapes):
         assert len(ishapes) == 1
         l = []
-        one = theano.tensor.basic.constant(1)
+        one = aesara.tensor.basic.constant(1)
         for ax in range(len(ishapes[0])):
             if self.axis.get(ax, False):
                 l.append(one)
@@ -819,12 +819,12 @@ def c_code_cache_version(self):
 
 def register_rebroadcast_c_code(typ, code, version=()):
     """
-    Tell Rebroadcast how to generate C code for a Theano Type.
+    Tell Rebroadcast how to generate C code for an Aesara Type.
 
-    typ : Theano type
-        It must be the Theano class itself and not an instance of the class.
+    typ : Aesara type
+        It must be the Aesara class itself and not an instance of the class.
     code : C code
-        That checks if the dimension %(axis)s is of shape 1 for the Theano type
+        That checks if the dimension %(axis)s is of shape 1 for the Aesara type
         'typ'. Use %(iname)s and %(oname)s for the input and output C variable
         names respectively, and %(axis)s for the axis that we need to check.
         This code is put in a loop for all axes.
@@ -974,7 +974,7 @@ def ones_like(model, dtype=None, opt=False):
     model : tensor
     dtype : data-type, optional
     opt : If True, we will return a constant instead of a graph when possible.
-          Useful for Theano optimization, not for user building a graph as this
+          Useful for Aesara optimization, not for user building a graph as this
           have the consequence that model isn't always in the graph.
 
     Returns
@@ -997,7 +997,7 @@ def zeros_like(model, dtype=None, opt=False):
     model : tensor
     dtype : data-type, optional
     opt : If True, we will return a constant instead of a graph when possible.
-          Useful for Theano optimization, not for user building a graph as this
+          Useful for Aesara optimization, not for user building a graph as this
           have the consequence that model isn't always in the graph.
 
     Returns
@@ -1088,7 +1088,7 @@ def nonzero(a, return_matrix=False):
             is the index of the ith non-zero element of the input array in the
             jth dimension.
 
-        If return_matrix is True (same as Theano Op):
+        If return_matrix is True (same as Aesara Op):
             Returns a matrix of shape (ndim, number of nonzero elements) such
             that element (i,j) is the index in the ith dimension of the jth
             non-zero element.
@@ -1563,14 +1563,14 @@ def do_constant_folding(self, fgraph, node):
                         # Not doing the constant folding could also lower
                         # the peak memory usage, as we the "constant" won't
                         # always exists.
-                        theano.tensor.subtensor.IncSubtensor,
-                        theano.tensor.subtensor.AdvancedIncSubtensor1,
-                        theano.tensor.subtensor.AdvancedIncSubtensor,
-                        theano.tensor.blas.Gemv,
-                        theano.tensor.blas_c.CGemv,
-                        theano.tensor.blas.Ger,
-                        theano.tensor.blas_c.CGer,
-                        theano.tensor.blas_scipy.ScipyGer,
+                        aesara.tensor.subtensor.IncSubtensor,
+                        aesara.tensor.subtensor.AdvancedIncSubtensor1,
+                        aesara.tensor.subtensor.AdvancedIncSubtensor,
+                        aesara.tensor.blas.Gemv,
+                        aesara.tensor.blas_c.CGemv,
+                        aesara.tensor.blas.Ger,
+                        aesara.tensor.blas_c.CGer,
+                        aesara.tensor.blas_scipy.ScipyGer,
                     ),
                 )
             ):
@@ -1633,7 +1633,7 @@ def make_node(self, *inputs):
 
     def perform(self, node, inputs, out_):
         (out,) = out_
-        # not calling theano._asarray as optimization
+        # not calling aesara._asarray as optimization
         if (out[0] is None) or (out[0].size != len(inputs)):
             out[0] = _asarray(inputs, dtype=node.outputs[0].dtype)
         else:
@@ -1709,7 +1709,7 @@ def transfer(var, target):
     Parameters
     ----------
     var : variable
-        A theano variable
+        A aesara variable
     target : str
         The target of the transfer
     """
@@ -1765,7 +1765,7 @@ def perform(self, node, inp, out_):
         x, default = inp
         (out,) = out_
         if x is None:
-            # why copy?  Theano can't yet understand out[0] being a view of
+            # why copy?  Aesara can't yet understand out[0] being a view of
             # either x or y, so we can be a view of x, but only a copy of y.
             out[0] = default.copy()
         else:
@@ -1922,7 +1922,7 @@ def infer_shape(self, fgraph, node, in_shapes):
         out_shapes = []
         for i in range(self.len_splits):
             temp = as_tensor_variable(shp_x)
-            temp = theano.tensor.subtensor.set_subtensor(temp[axis], splits[i])
+            temp = aesara.tensor.subtensor.set_subtensor(temp[axis], splits[i])
             temp = [temp[i] for i in range(len(shp_x))]
             out_shapes.append(temp)
         return out_shapes
@@ -2114,7 +2114,7 @@ def addbroadcast(x, *axes):
     Parameters
     ----------
     x : tensor_like
-        Input theano tensor.
+        Input aesara tensor.
     axis : an int or an iterable object such as list or tuple of int values
         The dimension along which the tensor x should be broadcastable.
         If the length of x along these dimensions is not 1, a ValueError will
@@ -2123,11 +2123,11 @@ def addbroadcast(x, *axes):
     Returns
     -------
     tensor
-        A theano tensor, which is broadcastable along the specified dimensions.
+        A aesara tensor, which is broadcastable along the specified dimensions.
 
     """
     rval = Rebroadcast(*[(axis, True) for axis in axes])(x)
-    return theano.tensor.basic_opt.apply_rebroadcast_opt(rval)
+    return aesara.tensor.basic_opt.apply_rebroadcast_opt(rval)
 
 
 def unbroadcast(x, *axes):
@@ -2144,7 +2144,7 @@ def unbroadcast(x, *axes):
     Parameters
     ----------
     x : tensor_like
-        Input theano tensor.
+        Input aesara tensor.
     axis : an int or an iterable object such as list or tuple of int values
         The dimension along which the tensor x should be unbroadcastable.
         If the length of x along these dimensions is not 1, a ValueError will
@@ -2153,11 +2153,11 @@ def unbroadcast(x, *axes):
     Returns
     -------
     tensor
-        A theano tensor, which is unbroadcastable along the specified dimensions.
+        A aesara tensor, which is unbroadcastable along the specified dimensions.
 
     """
     rval = Rebroadcast(*[(axis, False) for axis in axes])(x)
-    return theano.tensor.basic_opt.apply_rebroadcast_opt(rval)
+    return aesara.tensor.basic_opt.apply_rebroadcast_opt(rval)
 
 
 def patternbroadcast(x, broadcastable):
@@ -2175,7 +2175,7 @@ def patternbroadcast(x, broadcastable):
     Parameters
     ----------
     x : tensor_like
-        Input theano tensor.
+        Input aesara tensor.
     broadcastable : an iterable object such as list or tuple of bool values
         A set of boolean values indicating whether a dimension should be
         broadcastable or not. If the length of x along these dimensions is
@@ -2184,11 +2184,11 @@ def patternbroadcast(x, broadcastable):
     Returns
     -------
     tensor
-        A theano tensor, which is unbroadcastable along the specified dimensions.
+        A aesara tensor, which is unbroadcastable along the specified dimensions.
 
     """
     rval = Rebroadcast(*[(i, broadcastable[i]) for i in range(len(broadcastable))])(x)
-    return theano.tensor.basic_opt.apply_rebroadcast_opt(rval)
+    return aesara.tensor.basic_opt.apply_rebroadcast_opt(rval)
 
 
 class Join(COp):
@@ -2479,7 +2479,7 @@ def grad(self, axis_and_tensors, grads):
         return rval
 
     def infer_shape(self, fgraph, node, ishapes):
-        from theano.tensor.math import eq, ge
+        from aesara.tensor.math import eq, ge
 
         # ishapes[0] contains the size of the axis on which we join
         # Join op should get at least one input to join
@@ -2620,28 +2620,28 @@ def stack(*tensors, **kwargs):
 
     Examples
     --------
-    >>> a = theano.tensor.type.scalar()
-    >>> b = theano.tensor.type.scalar()
-    >>> c = theano.tensor.type.scalar()
-    >>> x = theano.tensor.stack([a, b, c])
+    >>> a = aesara.tensor.type.scalar()
+    >>> b = aesara.tensor.type.scalar()
+    >>> c = aesara.tensor.type.scalar()
+    >>> x = aesara.tensor.stack([a, b, c])
     >>> x.ndim # x is a vector of length 3.
     1
-    >>> a = theano.tensor.type.tensor4()
-    >>> b = theano.tensor.type.tensor4()
-    >>> c = theano.tensor.type.tensor4()
-    >>> x = theano.tensor.stack([a, b, c])
+    >>> a = aesara.tensor.type.tensor4()
+    >>> b = aesara.tensor.type.tensor4()
+    >>> c = aesara.tensor.type.tensor4()
+    >>> x = aesara.tensor.stack([a, b, c])
     >>> x.ndim # x is a 5d tensor.
     5
     >>> rval = x.eval(dict((t, np.zeros((2, 2, 2, 2))) for t in [a, b, c]))
     >>> rval.shape # 3 tensors are stacked on axis 0
     (3, 2, 2, 2, 2)
-    >>> x = theano.tensor.stack([a, b, c], axis=3)
+    >>> x = aesara.tensor.stack([a, b, c], axis=3)
     >>> x.ndim
     5
     >>> rval = x.eval(dict((t, np.zeros((2, 2, 2, 2))) for t in [a, b, c]))
     >>> rval.shape # 3 tensors are stacked on axis 3
     (2, 2, 2, 3, 2)
-    >>> x = theano.tensor.stack([a, b, c], axis=-2)
+    >>> x = aesara.tensor.stack([a, b, c], axis=-2)
     >>> x.ndim
     5
     >>> rval = x.eval(dict((t, np.zeros((2, 2, 2, 2))) for t in [a, b, c]))
@@ -2702,7 +2702,7 @@ def stack(*tensors, **kwargs):
         # in case there is direct int
         tensors = list(map(as_tensor_variable, tensors))
         dtype = ts.upcast(*[i.dtype for i in tensors])
-        return theano.tensor.basic_opt.MakeVector(dtype)(*tensors)
+        return aesara.tensor.basic_opt.MakeVector(dtype)(*tensors)
     return join(axis, *[shape_padaxis(t, axis) for t in tensors])
 
 
@@ -2756,11 +2756,11 @@ def get_vector_length(v):
         raise TypeError(f"argument must be symbolic vector, got '{v}'")
     if v.type.broadcastable[0]:
         return 1
-    if isinstance(v, theano.tensor.sharedvar.TensorSharedVariable) and v.type.ndim == 1:
+    if isinstance(v, aesara.tensor.sharedvar.TensorSharedVariable) and v.type.ndim == 1:
         return len(v.get_value())
     if isinstance(v, Constant) and v.type.ndim == 1:
         return len(v.data)
-    if v.owner and isinstance(v.owner.op, theano.tensor.basic_opt.MakeVector):
+    if v.owner and isinstance(v.owner.op, aesara.tensor.basic_opt.MakeVector):
         return len(v.owner.inputs)
     if v.owner and isinstance(v.owner.op, Shape):
         return v.owner.inputs[0].type.ndim
@@ -2791,11 +2791,11 @@ def get_vector_length(v):
     # TODO: We can cover more `*Subtensor` cases.
     if (
         v.owner
-        and isinstance(v.owner.op, theano.tensor.subtensor.Subtensor)
+        and isinstance(v.owner.op, aesara.tensor.subtensor.Subtensor)
         and isinstance(v.owner.op.idx_list[0], slice)
     ):
         try:
-            indices = theano.tensor.subtensor.get_idx_list(
+            indices = aesara.tensor.subtensor.get_idx_list(
                 v.owner.inputs, v.owner.op.idx_list
             )
             start = (
@@ -2911,7 +2911,7 @@ def perform(self, node, inp, out_):
             out[0] = x.reshape(newshape)
 
     def infer_shape(self, fgraph, node, in_shapes):
-        from theano.tensor.math import prod
+        from aesara.tensor.math import prod
 
         (in_shp,) = in_shapes
         part1 = in_shp[: self.ndim - 1]
@@ -3018,8 +3018,8 @@ def is_flat(var, ndim=None, outdim=None):
 
     Parameters
     ----------
-        var : theano.tensor.var.TensorVariable
-            the theano var on which the dimensionality is checked.
+        var : aesara.tensor.var.TensorVariable
+            the aesara var on which the dimensionality is checked.
 
         outdim : int
             the expected dimensionality of var.
@@ -3049,7 +3049,7 @@ def flatten(x, ndim=1):
 
     Parameters
     ----------
-    x : theano.tensor.var.TensorVariable
+    x : aesara.tensor.var.TensorVariable
         The variable to be reshaped.
     ndim : int
         The number of dimensions of the returned variable
@@ -3057,7 +3057,7 @@ def flatten(x, ndim=1):
 
     Returns
     -------
-    theano.tensor.var.TensorVariable
+    aesara.tensor.var.TensorVariable
         the flattend variable with dimensionality of outdim
     """
     if ndim is None:
@@ -3172,7 +3172,7 @@ def tile(x, reps, ndim=None):
     be provided.
 
     """
-    from theano.tensor.math import ge
+    from aesara.tensor.math import ge
 
     if ndim is not None and ndim < x.ndim:
         raise ValueError("ndim should be equal or larger than x.ndim")
@@ -3265,7 +3265,7 @@ def make_node(self, start, stop, step):
 
     @config.change_flags(warn_float64="ignore")
     def infer_shape(self, fgraph, node, i_shapes):
-        from theano.tensor.math import ceil, maximum
+        from aesara.tensor.math import ceil, maximum
 
         # Note start, stop and step can be float numbers.
         start, stop, step = node.inputs
@@ -3619,7 +3619,7 @@ def perform(self, node, inp, out):
         self._rec_perform(node, x, y, inverse, outs[0], curdim=0)
 
     def infer_shape(self, fgraph, node, in_shapes):
-        from theano.tensor.math import maximum
+        from aesara.tensor.math import maximum
 
         shp_x = in_shapes[0]
         shp_y = in_shapes[1]
@@ -3630,7 +3630,7 @@ def infer_shape(self, fgraph, node, in_shapes):
         return [out_shape]
 
     def grad(self, inp, grads):
-        from theano.tensor.math import Sum, eq
+        from aesara.tensor.math import Sum, eq
 
         x, y, inverse = inp
         (gz,) = grads
@@ -3788,7 +3788,7 @@ def grad(self, inputs, gout):
             x = zeros_like(x)
             xdiag = AllocDiag(offset=self.offset)(gz)
             return [
-                theano.tensor.subtensor.set_subtensor(
+                aesara.tensor.subtensor.set_subtensor(
                     x[: xdiag.shape[0], : xdiag.shape[1]], xdiag
                 )
             ]
@@ -3797,7 +3797,7 @@ def grad(self, inputs, gout):
             return [grad_not_implemented(self, 0, x)]
 
     def infer_shape(self, fgraph, node, shapes):
-        from theano.tensor.math import clip, minimum
+        from aesara.tensor.math import clip, minimum
 
         (in_shape,) = shapes
         dim1 = in_shape[self.axis1]
@@ -4011,9 +4011,9 @@ def stacklists(arg):
 
     Examples
     --------
-    >>> from theano.tensor import stacklists
-    >>> from theano.tensor.type import scalars, matrices
-    >>> from theano import function
+    >>> from aesara.tensor import stacklists
+    >>> from aesara.tensor.type import scalars, matrices
+    >>> from aesara import function
     >>> a, b, c, d = scalars('abcd')
     >>> X = stacklists([[a, b], [c, d]])
     >>> f = function([a, b, c, d], X)
@@ -4127,7 +4127,7 @@ def __init__(self, mode):
     def infer_shape(self, fgraph, node, shapes):
 
         a_shape, choices_shape = shapes
-        out_shape = theano.tensor.extra_ops.broadcast_shape(
+        out_shape = aesara.tensor.extra_ops.broadcast_shape(
             a_shape, choices_shape[1:], arrays_are_shapes=True
         )
 
@@ -4136,7 +4136,7 @@ def infer_shape(self, fgraph, node, shapes):
     def make_node(self, a, choices):
         # Import here as it isn't imported by default and we can't
         # import at the top as it would cause circular import.
-        import theano.typed_list
+        import aesara.typed_list
 
         a = as_tensor_variable(a)
         if a.dtype not in discrete_dtypes:
@@ -4147,7 +4147,7 @@ def make_node(self, a, choices):
         # Only use make_list if choices have inconsistent shapes
         # otherwise use as_tensor_variable
         if isinstance(choices, (tuple, list)):
-            choice = theano.typed_list.make_list(choices)
+            choice = aesara.typed_list.make_list(choices)
         else:
             choice = as_tensor_variable(choices)
         (out_shape,) = self.infer_shape(
@@ -4157,7 +4157,7 @@ def make_node(self, a, choices):
         bcast = []
         for s in out_shape:
             try:
-                s_val = theano.get_scalar_constant_value(s)
+                s_val = aesara.get_scalar_constant_value(s)
             except (NotScalarConstantError, AttributeError):
                 s_val = None
 
diff --git a/theano/tensor/basic_opt.py b/aesara/tensor/basic_opt.py
similarity index 98%
rename from theano/tensor/basic_opt.py
rename to aesara/tensor/basic_opt.py
index bdcb8e0239..5e5f64b142 100644
--- a/theano/tensor/basic_opt.py
+++ b/aesara/tensor/basic_opt.py
@@ -12,23 +12,23 @@
 
 import numpy as np
 
-import theano
-import theano.scalar.basic as ts
-from theano import compile
-from theano.assert_op import Assert, assert_op
-from theano.compile.ops import ViewOp
-from theano.configdefaults import config
-from theano.graph import toolbox
-from theano.graph.basic import (
+import aesara
+import aesara.scalar.basic as ts
+from aesara import compile
+from aesara.assert_op import Assert, assert_op
+from aesara.compile.ops import ViewOp
+from aesara.configdefaults import config
+from aesara.graph import toolbox
+from aesara.graph.basic import (
     Constant,
     Variable,
     ancestors,
     equal_computations,
     io_toposort,
 )
-from theano.graph.fg import InconsistencyError
-from theano.graph.op import get_test_value
-from theano.graph.opt import (
+from aesara.graph.fg import InconsistencyError
+from aesara.graph.op import get_test_value
+from aesara.graph.opt import (
     GlobalOptimizer,
     OpRemove,
     TopoOptimizer,
@@ -37,14 +37,14 @@
     in2out,
     local_optimizer,
 )
-from theano.graph.optdb import SequenceDB
-from theano.graph.utils import (
+from aesara.graph.optdb import SequenceDB
+from aesara.graph.utils import (
     MethodNotDefined,
     TestValueError,
     get_variable_trace_string,
 )
-from theano.printing import pprint
-from theano.tensor.basic import (
+from aesara.printing import pprint
+from aesara.tensor.basic import (
     Alloc,
     AllocEmpty,
     ARange,
@@ -74,12 +74,12 @@
     zeros,
     zeros_like,
 )
-from theano.tensor.elemwise import DimShuffle, Elemwise
-from theano.tensor.exceptions import NotScalarConstantError, ShapeError
-from theano.tensor.extra_ops import broadcast_shape
-from theano.tensor.math import Dot, add
-from theano.tensor.math import all as tt_all
-from theano.tensor.math import (
+from aesara.tensor.elemwise import DimShuffle, Elemwise
+from aesara.tensor.exceptions import NotScalarConstantError, ShapeError
+from aesara.tensor.extra_ops import broadcast_shape
+from aesara.tensor.math import Dot, add
+from aesara.tensor.math import all as tt_all
+from aesara.tensor.math import (
     and_,
     ceil_intdiv,
     dot,
@@ -92,9 +92,9 @@
     minimum,
     or_,
 )
-from theano.tensor.shape import Reshape, Shape, Shape_i, shape, shape_padleft
-from theano.tensor.sort import TopKOp
-from theano.tensor.subtensor import (
+from aesara.tensor.shape import Reshape, Shape, Shape_i, shape, shape_padleft
+from aesara.tensor.sort import TopKOp
+from aesara.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
     AdvancedSubtensor1,
@@ -107,12 +107,12 @@
     get_canonical_form_slice,
     get_idx_list,
 )
-from theano.tensor.type import TensorType, discrete_dtypes, integer_dtypes, lscalar
-from theano.tensor.var import TensorConstant
-from theano.utils import NoDuplicateOptWarningFilter
+from aesara.tensor.type import TensorType, discrete_dtypes, integer_dtypes, lscalar
+from aesara.tensor.var import TensorConstant
+from aesara.utils import NoDuplicateOptWarningFilter
 
 
-_logger = logging.getLogger("theano.tensor.basic_opt")
+_logger = logging.getLogger("aesara.tensor.basic_opt")
 _logger.addFilter(NoDuplicateOptWarningFilter())
 
 
@@ -224,7 +224,7 @@ def __init__(self, OP):
         self.op = OP
 
     def add_requirements(self, fgraph):
-        from theano.graph.destroyhandler import DestroyHandler
+        from aesara.graph.destroyhandler import DestroyHandler
 
         fgraph.attach_feature(DestroyHandler())
 
@@ -265,7 +265,7 @@ def apply(self, fgraph):
         """
         # We should not validate too often as this takes too much time to
         # execute!
-        # It is the _dfs_toposort() fct in theano/graph/destroyhandler.py
+        # It is the _dfs_toposort() fct in aesara/graph/destroyhandler.py
         # that takes so much time.
         # Should we try to use another lib that does toposort?
         #   igraph: http://igraph.sourceforge.net/
@@ -275,7 +275,7 @@ def apply(self, fgraph):
         #   deque class too?
         #   And init the deque and other list to an upper bound number of
         #   elements?
-        # Maybe Theano should do online toposort as in
+        # Maybe Aesara should do online toposort as in
         #   http://code.google.com/p/acyclic
         #
         # The next longest optimizer is the canonizer phase.
@@ -310,7 +310,7 @@ def apply(self, fgraph):
         protected_inputs = [
             f.protected
             for f in fgraph._features
-            if isinstance(f, theano.compile.function.types.Supervisor)
+            if isinstance(f, aesara.compile.function.types.Supervisor)
         ]
         protected_inputs = sum(protected_inputs, [])  # flatten the list
         protected_inputs.extend(fgraph.outputs)
@@ -880,7 +880,7 @@ class ShapeFeature(toolbox.Feature):
     elements are either integers or symbolic integers.
 
     TODO: check to see if the symbols are necessarily
-    non-constant... or are integer literals sometimes Theano
+    non-constant... or are integer literals sometimes Aesara
     constants?? That would be confusing.
 
     """
@@ -1102,7 +1102,7 @@ def set_shape(self, r, s, override=False):
 
             if r.ndim != len(s):
                 sio = StringIO()
-                theano.printing.debugprint(r, file=sio, print_type=True)
+                aesara.printing.debugprint(r, file=sio, print_type=True)
                 raise AssertionError(
                     f"Something inferred a shape with {len(s)} dimensions "
                     f"for a variable with {int(r.ndim)} dimensions"
@@ -1461,7 +1461,7 @@ def same_shape(self, x, y, dim_x=None, dim_y=None):
             if dx.owner.inputs[0] == dy.owner.inputs[0]:
                 continue
             # To be sure to cover all case, call equal_computation.
-            # Can't use theano.graph.basic.is_same_graph(dx, dy)
+            # Can't use aesara.graph.basic.is_same_graph(dx, dy)
             # As it currently expect that dx and dy aren't in a FunctionGraph
             if not equal_computations([dx], [dy]):
                 return False
@@ -1489,13 +1489,13 @@ def apply(self, fgraph):
 
 # Register it after merge1 optimization at 0. We don't want to track
 # the shape of merged node.
-theano.compile.mode.optdb.register(
+aesara.compile.mode.optdb.register(
     "ShapeOpt", ShapeOptimizer(), 0.1, "fast_run", "fast_compile"
 )
 # Not enabled by default for now. Some crossentropy opt use the
 # shape_feature.  They are at step 2.01. uncanonicalize is at step
 # 3. After it goes to 48.5 that move to the gpu. So 10 seem resonable.
-theano.compile.mode.optdb.register("UnShapeOpt", UnShapeOptimizer(), 10)
+aesara.compile.mode.optdb.register("UnShapeOpt", UnShapeOptimizer(), 10)
 
 
 def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
@@ -1773,7 +1773,7 @@ def local_fill_to_alloc(fgraph, node):
             node.outputs[0].type,
             "node",
             node,
-        )  # theano.printing.debugprint(node.outputs[0], file='str'))
+        )  # aesara.printing.debugprint(node.outputs[0], file='str'))
         return rval
 
 
@@ -1896,7 +1896,7 @@ def local_alloc_empty_to_zeros(fgraph, node):
     """This convert AllocEmpty to Alloc of 0.
 
     This help investigate NaN with NanGuardMode.  Not registered by
-    default. To activate it, use the Theano flag
+    default. To activate it, use the Aesara flag
     optimizer_including=alloc_empty_to_zeros. This also enable
     the GPU version of this optimizations.
 
@@ -2292,7 +2292,7 @@ def local_cast_cast(fgraph, node):
 def is_an_upcast(type1, type2):
     """Given two data types (as strings), check if converting to
     type2 from type1 constitutes an upcast.
-    Differs from theano.scalar.upcast
+    Differs from aesara.scalar.upcast
 
     """
     category = {
@@ -2908,9 +2908,9 @@ def merge_two_slices(fgraph, slice1, len1, slice2, len2):
             n_val = sl1.stop - 1 - sl2 * sl1.step
             if config.warn__subtensor_merge_bug:
                 warnings.warning(
-                    "Your current code is fine, but Theano versions "
+                    "Your current code is fine, but Aesara versions "
                     "prior to 0.5rc2 might have given an incorrect result. "
-                    "To disable this warning, set the Theano flag "
+                    "To disable this warning, set the Aesara flag "
                     "warn__subtensor_merge_bug to False."
                 )
             # we need to pick either n_val or p_val and then follow same
@@ -3483,7 +3483,7 @@ def local_adv_sub1_adv_inc_sub1(fgraph, node):
     if not inp.owner.op.set_instead_of_inc:
         if config.warn__inc_subtensor1_opt:
             warnings.warning(
-                "Your current code is fine, but Theano versions "
+                "Your current code is fine, but Aesara versions "
                 "between 0.7rc1 and 0.10 (or development versions "
                 "between Nov. 2014 and May 2017) "
                 "might have given incorrect results. This graph has "
@@ -3491,7 +3491,7 @@ def local_adv_sub1_adv_inc_sub1(fgraph, node):
                 "where idx is an array of integers. This used to be "
                 'optimized to "x", which is incorrect if there are '
                 "duplicated indices in idx. "
-                "To disable this warning, set the Theano flag "
+                "To disable this warning, set the Aesara flag "
                 "warn__inc_subtensor1_opt to False."
             )
         return
@@ -4426,7 +4426,7 @@ def local_fuse(fgraph, node):
         For mixed dtype, we let the `Composite` `Op` do the cast. It lets the C
         compiler do the cast.
 
-        The number of dimensions is validated at call time by Theano itself.
+        The number of dimensions is validated at call time by Aesara itself.
 
         """
         # META TODO:  PUT THESE THINGS IN TRAC, NOT TODO NOTES!!
@@ -4851,7 +4851,7 @@ def local_merge_alloc(fgraph, node):
                 dims_outer[-1 - i] = Assert(
                     "You have a shape error in your graph. To see a better"
                     " error message and a stack trace of where in your code"
-                    " the error is created, use the Theano flags"
+                    " the error is created, use the Aesara flags"
                     " optimizer=None or optimizer=fast_compile."
                 )(dim_outer, eq(dim_outer, dim_inner))
         i += 1
diff --git a/theano/tensor/blas.py b/aesara/tensor/blas.py
similarity index 96%
rename from theano/tensor/blas.py
rename to aesara/tensor/blas.py
index a9b856f7a1..7078933a13 100644
--- a/theano/tensor/blas.py
+++ b/aesara/tensor/blas.py
@@ -6,16 +6,16 @@
 The standard BLAS libraries implement what is called "legacy BLAS" in that
 document.
 
-This documentation describes Theano's BLAS optimization pipeline.
+This documentation describes Aesara's BLAS optimization pipeline.
 
 Where there is a discrepancy between how things do work and how they *should*
 work, both aspects should be documented.
 
-There are four kinds of BLAS Ops in Theano:
+There are four kinds of BLAS Ops in Aesara:
     - Python implementations (this file)
     - SciPy-based (blas_scipy)
     - C-based (blas_c)
-    - GPU-based (theano.gpuarray)
+    - GPU-based (aesara.gpuarray)
 
 Notes
 -----
@@ -141,36 +141,36 @@
 
 from functools import reduce
 
-import theano.scalar
-from theano.compile.mode import optdb
-from theano.configdefaults import config
-from theano.graph.basic import Apply, view_roots
-from theano.graph.fg import InconsistencyError
-from theano.graph.op import COp, Op
-from theano.graph.opt import (
+import aesara.scalar
+from aesara.compile.mode import optdb
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, view_roots
+from aesara.graph.fg import InconsistencyError
+from aesara.graph.op import COp, Op
+from aesara.graph.opt import (
     EquilibriumOptimizer,
     GlobalOptimizer,
     in2out,
     inherit_stack_trace,
     local_optimizer,
 )
-from theano.graph.optdb import SequenceDB
-from theano.graph.params_type import ParamsType
-from theano.graph.toolbox import ReplacementDidNotRemoveError, ReplaceValidate
-from theano.graph.utils import MethodNotDefined, TestValueError
-from theano.printing import FunctionPrinter, debugprint, pprint
-from theano.scalar import bool as bool_t
-from theano.tensor import basic as tt
-from theano.tensor.basic_opt import local_dimshuffle_lift
-from theano.tensor.blas_headers import blas_header_text, blas_header_version
-from theano.tensor.elemwise import DimShuffle, Elemwise
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import Dot, add, mul, neg, sub
-from theano.tensor.type import integer_dtypes, tensor, values_eq_approx_remove_inf_nan
-from theano.utils import memoize
-
-
-_logger = logging.getLogger("theano.tensor.blas")
+from aesara.graph.optdb import SequenceDB
+from aesara.graph.params_type import ParamsType
+from aesara.graph.toolbox import ReplacementDidNotRemoveError, ReplaceValidate
+from aesara.graph.utils import MethodNotDefined, TestValueError
+from aesara.printing import FunctionPrinter, debugprint, pprint
+from aesara.scalar import bool as bool_t
+from aesara.tensor import basic as tt
+from aesara.tensor.basic_opt import local_dimshuffle_lift
+from aesara.tensor.blas_headers import blas_header_text, blas_header_version
+from aesara.tensor.elemwise import DimShuffle, Elemwise
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import Dot, add, mul, neg, sub
+from aesara.tensor.type import integer_dtypes, tensor, values_eq_approx_remove_inf_nan
+from aesara.utils import memoize
+
+
+_logger = logging.getLogger("aesara.tensor.blas")
 
 try:
     import scipy.linalg.blas
@@ -197,7 +197,7 @@
     if not config.blas__ldflags:
         _logger.warning(
             "Failed to import scipy.linalg.blas, and "
-            "Theano flag blas__ldflags is empty. "
+            "Aesara flag blas__ldflags is empty. "
             "Falling back on slower implementations for "
             "dot(matrix, vector), dot(vector, matrix) and "
             f"dot(vector, vector) ({str(e)})"
@@ -852,7 +852,7 @@ class Gemm(GemmRelated):
     argument. Because of this in-place computation, an L{Apply} of
     this op will destroy the L{Variable} z on which it operates.  (See
     L{DestructiveOps} for an explanation of what destroying means in
-    the context of theano graphs. See L{BlasLapackSupport} for more
+    the context of aesara graphs. See L{BlasLapackSupport} for more
     optimized linear algebra operations.)
 
     """
@@ -910,7 +910,7 @@ def make_node(self, *inputs):
         # declare to be inplace only on z. So to make it safe, we
         # raise an error if z can be a view on x or y.
 
-        # I don't know if Theano currently can support that case. As
+        # I don't know if Aesara currently can support that case. As
         # this case don't happen in our code, I won't spent time
         # investigating this. So the assert is for safety.  I also
         # think there is another mechanism that would prevent this,
@@ -1095,7 +1095,7 @@ def c_code_cache_version(self):
 
 gemm_inplace = Gemm(inplace=True)
 gemm_no_inplace = Gemm(inplace=False)
-# For the user interface. Theano optimization will make them inplace
+# For the user interface. Aesara optimization will make them inplace
 gemm = gemm_no_inplace
 pprint.assign(gemm_inplace, FunctionPrinter("gemm_inplace"))
 pprint.assign(gemm_no_inplace, FunctionPrinter("gemm_no_inplace"))
@@ -1128,7 +1128,7 @@ def _as_scalar(res, dtype=None):
             # This is valid when res is a scalar used as input to a dot22
             # as the cast of the scalar can be done before or after the dot22
             # and this will give the same result.
-            if theano.scalar.upcast(res.dtype, dtype) == dtype:
+            if aesara.scalar.upcast(res.dtype, dtype) == dtype:
                 return tt.cast(rval, dtype)
             else:
                 return None
@@ -1300,7 +1300,7 @@ def _factor_canonicalized(lst):
     #        t = (t,)
     #    for e in t:
     #        try:
-    #            theano.printing.debugprint(e)
+    #            aesara.printing.debugprint(e)
     #        except TypeError:
     #            print e, type(e)
     i = 0
@@ -1339,11 +1339,11 @@ def _gemm_from_factored_list(fgraph, lst):
     # This can happen when we try to cast a complex to a real
     for sM in lst:
         # Make every pair in list have matching dtypes
-        # sM can be a tuple of 2 elements or a theano variable.
+        # sM can be a tuple of 2 elements or an Aesara variable.
         if isinstance(sM, tuple):
             sm0, sm1 = sM
             sm0 = tt.as_tensor_variable(sm0)
-            if theano.scalar.upcast(sm0.dtype, sm1.dtype) == sm1.dtype:
+            if aesara.scalar.upcast(sm0.dtype, sm1.dtype) == sm1.dtype:
                 lst2.append((tt.cast(sm0, sm1.dtype), sM[1]))
 
     lst = lst2
@@ -1456,12 +1456,12 @@ def on_import(new_node):
             if new_node is not node:
                 nodelist.append(new_node)
 
-        u = theano.graph.opt.Updater(on_import, None, None, name="GemmOptimizer")
+        u = aesara.graph.opt.Updater(on_import, None, None, name="GemmOptimizer")
         fgraph.attach_feature(u)
         while did_something:
             nb_iter += 1
             t0 = time.time()
-            nodelist = theano.graph.basic.io_toposort(fgraph.inputs, fgraph.outputs)
+            nodelist = aesara.graph.basic.io_toposort(fgraph.inputs, fgraph.outputs)
             time_toposort += time.time() - t0
             did_something = False
             nodelist.reverse()
@@ -1471,10 +1471,10 @@ def on_import(new_node):
                     and isinstance(
                         node.op.scalar_op,
                         (
-                            theano.scalar.Add,
-                            theano.scalar.Sub,
-                            theano.scalar.Neg,
-                            theano.scalar.Mul,
+                            aesara.scalar.Add,
+                            aesara.scalar.Sub,
+                            aesara.scalar.Neg,
+                            aesara.scalar.Mul,
                         ),
                     )
                 ):
@@ -1770,7 +1770,7 @@ def local_dot22_to_ger_or_gemv(fgraph, node):
                 return [rval]
             if xb[0] and yb[1]:
                 # x and y are both vectors so this qualifies for a sdot / ddot
-                # TODO: Theano doesn't have a sdot, but gemv is better than _dot22
+                # TODO: Aesara doesn't have a sdot, but gemv is better than _dot22
                 xv = x.dimshuffle(1)
                 zeros = tt.AllocEmpty(x.dtype)(1)
                 rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
@@ -1987,7 +1987,7 @@ def local_dot22_to_dot22scalar(fgraph, node):
         scalar_idx = -1
         for i, x in enumerate(m.owner.inputs):
             if _as_scalar(x, dtype=d.dtype) and (
-                theano.scalar.upcast(x.type.dtype, d.type.dtype) == d.type.dtype
+                aesara.scalar.upcast(x.type.dtype, d.type.dtype) == d.type.dtype
             ):
                 scalar_idx = i
                 break
@@ -2021,7 +2021,7 @@ def local_dot22_to_dot22scalar(fgraph, node):
         if (
             i != dot22_idx
             and i_scalar[i] is not None
-            and (theano.scalar.upcast(x.type.dtype, d.type.dtype) == d.type.dtype)
+            and (aesara.scalar.upcast(x.type.dtype, d.type.dtype) == d.type.dtype)
         ):
             scalar_idx = i
             break
@@ -2065,24 +2065,21 @@ def make_node(self, *inputs):
         inputs = list(map(tt.as_tensor_variable, inputs))
 
         if len(inputs) != 2:
-            raise TypeError(
-                "theano.tensor.blas.BatchedDot: 2 arguments"
-                f" required, {len(inputs)} given "
-            )
+            raise TypeError(f"Two arguments required, but {len(inputs)} given.")
         if inputs[0].ndim not in (2, 3):
             raise TypeError(
-                "theano.tensor.blas.BatchedDot: input 0 (0-indexed)"
+                "Input 0 (0-indexed)"
                 f" must have ndim of 2 or 3, {int(inputs[0].ndim)} given. Consider"
-                " calling theano.tensor.blas.batched_dot instead."
+                " calling batched_dot instead."
             )
         if inputs[1].ndim not in (2, 3):
             raise TypeError(
-                "theano.tensor.blas.BatchedDot: input 1 (0-indexed)"
+                "Input 1 (0-indexed)"
                 f" must have ndim of 2 or 3, {int(inputs[1].ndim)} given. Consider"
-                " calling theano.tensor.blas.batched_dot instead."
+                " calling batched_dot instead."
             )
 
-        dtype = theano.scalar.upcast(*[input.type.dtype for input in inputs])
+        dtype = aesara.scalar.upcast(*[input.type.dtype for input in inputs])
         # upcast inputs to common dtype if needed
         upcasted_inputs = [tt.cast(input, dtype) for input in inputs]
         broadcastable = (
@@ -2098,7 +2095,7 @@ def perform(self, node, inp, out):
 
         if x.shape[0] != y.shape[0]:
             raise TypeError(
-                f"theano.tensor.blas.BatchedDot: inputs [{', '.join(map(str, inp))}] must have the"
+                f"Inputs [{', '.join(map(str, inp))}] must have the"
                 f" same size in axis 0, but have sizes [{', '.join([str(i.shape[0]) for i in inp])}]."
             )
 
@@ -2410,7 +2407,7 @@ def c_dimshuffle(newname, oldname, shape):
         )
 
     def c_code_cache_version(self):
-        from theano.tensor.blas_headers import blas_header_version
+        from aesara.tensor.blas_headers import blas_header_version
 
         return (4, blas_header_version())
 
@@ -2463,35 +2460,35 @@ def R_op(self, inputs, eval_points):
 
         if test_values_enabled:
             try:
-                iv0 = theano.graph.op.get_test_value(inputs[0])
+                iv0 = aesara.graph.op.get_test_value(inputs[0])
             except TestValueError:
-                theano.graph.op.missing_test_message(
+                aesara.graph.op.missing_test_message(
                     "first input passed to BatchedDot.R_op has no test value"
                 )
                 test_values_enabled = False
 
             try:
-                iv1 = theano.graph.op.get_test_value(inputs[1])
+                iv1 = aesara.graph.op.get_test_value(inputs[1])
             except TestValueError:
-                theano.graph.op.missing_test_message(
+                aesara.graph.op.missing_test_message(
                     "second input passed to BatchedDot.R_op has no test value"
                 )
                 test_values_enabled = False
 
             if eval_points[0]:
                 try:
-                    ev0 = theano.graph.op.get_test_value(eval_points[0])
+                    ev0 = aesara.graph.op.get_test_value(eval_points[0])
                 except TestValueError:
-                    theano.graph.op.missing_test_message(
+                    aesara.graph.op.missing_test_message(
                         "first eval point passed to BatchedDot.R_op "
                         "has no test value"
                     )
                     test_values_enabled = False
             if eval_points[1]:
                 try:
-                    ev1 = theano.graph.op.get_test_value(eval_points[1])
+                    ev1 = aesara.graph.op.get_test_value(eval_points[1])
                 except TestValueError:
-                    theano.graph.op.missing_test_message(
+                    aesara.graph.op.missing_test_message(
                         "second eval point passed to BatchedDot.R_op "
                         "has no test value"
                     )
@@ -2556,12 +2553,12 @@ def batched_dot(a, b):
     following sequence:
 
         1.  If either a or b is a vector, it returns the batched elementwise
-            product without calling the Theano BatchedDot op.
+            product without calling the Aesara BatchedDot op.
 
-        2.  If both a and b have either 2 or 3 dimensions, it calls Theano's
+        2.  If both a and b have either 2 or 3 dimensions, it calls Aesara's
             BatchedDot op on a and b.
 
-        3.  If either a or b has more than 3 dimensions, it calls Theano's
+        3.  If either a or b has more than 3 dimensions, it calls Aesara's
             batched_tensordot function with appropriate axes. The
             batched_tensordot function expresses high-dimensional batched
             dot products in terms of batched matrix-matrix dot products, so
@@ -2619,6 +2616,6 @@ def batched_tensordot(x, y, axes=2):
     reshapes to reduce the tensor dot product to a matrix or vector
     dot product.  Finally, it calls batched_dot to compute the result.
     """
-    from theano.tensor.math import _tensordot_as_dot
+    from aesara.tensor.math import _tensordot_as_dot
 
     return _tensordot_as_dot(x, y, axes, dot=batched_dot, batched=True)
diff --git a/theano/tensor/blas_c.py b/aesara/tensor/blas_c.py
similarity index 98%
rename from theano/tensor/blas_c.py
rename to aesara/tensor/blas_c.py
index 16875e2362..acd8e6bda7 100644
--- a/theano/tensor/blas_c.py
+++ b/aesara/tensor/blas_c.py
@@ -1,14 +1,14 @@
-# import theano.tensor.basic as tt
+# import aesara.tensor.basic as tt
 
-from theano.configdefaults import config
-from theano.graph.op import COp
-from theano.graph.opt import in2out
-from theano.graph.params_type import ParamsType
-from theano.scalar import bool as bool_t
+from aesara.configdefaults import config
+from aesara.graph.op import COp
+from aesara.graph.opt import in2out
+from aesara.graph.params_type import ParamsType
+from aesara.scalar import bool as bool_t
 
-# Work-around for Python 3.6 issue that prevents `import theano.tensor as tt`
-from theano.tensor import basic as tt
-from theano.tensor.blas import (
+# Work-around for Python 3.6 issue that prevents `import aesara.tensor as tt`
+from aesara.tensor import basic as tt
+from aesara.tensor.blas import (
     Gemv,
     Ger,
     blas_header_text,
@@ -646,7 +646,7 @@ def c_code_cache_version(self):
 
 def check_force_gemv_init():
     if check_force_gemv_init._force_init_beta is None:
-        from theano.link.c.cmodule import GCC_compiler
+        from aesara.link.c.cmodule import GCC_compiler
 
         """
         Test issue 1569.
diff --git a/theano/tensor/blas_headers.py b/aesara/tensor/blas_headers.py
similarity index 99%
rename from theano/tensor/blas_headers.py
rename to aesara/tensor/blas_headers.py
index 716f13d21d..93ef874d4d 100644
--- a/theano/tensor/blas_headers.py
+++ b/aesara/tensor/blas_headers.py
@@ -11,18 +11,18 @@
 import textwrap
 from os.path import dirname
 
-from theano.configdefaults import config
-from theano.link.c.cmodule import GCC_compiler
+from aesara.configdefaults import config
+from aesara.link.c.cmodule import GCC_compiler
 
 
-_logger = logging.getLogger("theano.tensor.blas")
+_logger = logging.getLogger("aesara.tensor.blas")
 
 
 def detect_macos_sdot_bug():
     """
     Try to detect a bug in the default BLAS in MacOS.
 
-    The problem in Theano has been reported in gh-1240,
+    The problem in Aesara has been reported in gh-1240,
     the underlying bug has been confirmed in
     http://www.macresearch.org/lapackblas-fortran-106#comment-17227.
 
@@ -1002,7 +1002,6 @@ def blas_header_text():
                             "FATAL: The implementation of BLAS SDOT "
                             "routine in your system has a bug that "
                             "makes it return wrong results.\\n"
-                            "Please contact theano-dev@groups.google.com.\\n"
                             "You can work around this bug by using a "
                             "different BLAS library, or disabling BLAS\\n");
                         assert(0);
diff --git a/theano/tensor/blas_scipy.py b/aesara/tensor/blas_scipy.py
similarity index 96%
rename from theano/tensor/blas_scipy.py
rename to aesara/tensor/blas_scipy.py
index 717d714fba..b23b4460c2 100644
--- a/theano/tensor/blas_scipy.py
+++ b/aesara/tensor/blas_scipy.py
@@ -4,8 +4,8 @@
 
 import numpy as np
 
-from theano.graph.opt import in2out
-from theano.tensor.blas import (
+from aesara.graph.opt import in2out
+from aesara.tensor.blas import (
     Ger,
     blas_optdb,
     ger,
@@ -17,7 +17,7 @@
 
 
 if have_fblas:
-    from theano.tensor.blas import fblas
+    from aesara.tensor.blas import fblas
 
     _blas_ger_fns = {
         np.dtype("float32"): fblas.sger,
diff --git a/theano/tensor/c_code/alt_blas_common.h b/aesara/tensor/c_code/alt_blas_common.h
similarity index 91%
rename from theano/tensor/c_code/alt_blas_common.h
rename to aesara/tensor/c_code/alt_blas_common.h
index 5c74a08120..0593c12e67 100644
--- a/theano/tensor/c_code/alt_blas_common.h
+++ b/aesara/tensor/c_code/alt_blas_common.h
@@ -1,5 +1,5 @@
-/** C Implementation (with NumPy back-end) of BLAS functions used in Theano.
- * Used instead of BLAS when Theano flag ``blas__ldflags`` is empty.
+/** C Implementation (with NumPy back-end) of BLAS functions used in Aesara.
+ * Used instead of BLAS when Aesara flag ``blas__ldflags`` is empty.
  * This file contains some useful header code not templated.
  * File alt_blas_template.c currently contains template code for:
  * - [sd]gemm_
diff --git a/theano/tensor/c_code/alt_blas_template.c b/aesara/tensor/c_code/alt_blas_template.c
similarity index 99%
rename from theano/tensor/c_code/alt_blas_template.c
rename to aesara/tensor/c_code/alt_blas_template.c
index 443d3e9285..f8ed541426 100644
--- a/theano/tensor/c_code/alt_blas_template.c
+++ b/aesara/tensor/c_code/alt_blas_template.c
@@ -1,4 +1,4 @@
-/** Alternative template NumPy-based implementation of BLAS functions used in Theano. **/
+/** Alternative template NumPy-based implementation of BLAS functions used in Aesara. **/
 
 /* Compute matrix[i][j] = scalar for every position (i, j) in matrix. */
 void alt_numpy_memset_inplace_%(float_type)s(PyArrayObject* matrix, const %(float_type)s* scalar) {
@@ -277,7 +277,7 @@ void %(precision)sgemv_(
             alt_fatal_error("NumPy %(precision)sgemv_ implementation: the output vector should be empty.");
         return;
     }
-    /* Vector pointers points to the beginning of memory (see function `theano.tensor.blas_c.gemv_c_code`).
+    /* Vector pointers points to the beginning of memory (see function `aesara.tensor.blas_c.gemv_c_code`).
      * NumPy seems to expect that pointers points to the first element of the array. */
     if (*incx < 0)
         x += (size_x - 1) * (-*incx);
@@ -338,7 +338,7 @@ void %(precision)sgemv_(
         alt_fatal_error("NumPy %(precision)sdot_ implementation: INCX and INCY must not be 0.");
     %(float_type)s result = 0;
     int one = 1;
-    /* Vector pointers points to the beginning of memory (see function `theano.tensor.blas_c.gemv_c_code`).
+    /* Vector pointers points to the beginning of memory (see function `aesara.tensor.blas_c.gemv_c_code`).
      * NumPy seems to expect that pointers points to the first element of the array. */
     if (*INCX < 0)
         SX += (*N - 1) * (-*INCX);
diff --git a/theano/tensor/c_code/dimshuffle.c b/aesara/tensor/c_code/dimshuffle.c
similarity index 100%
rename from theano/tensor/c_code/dimshuffle.c
rename to aesara/tensor/c_code/dimshuffle.c
diff --git a/theano/tensor/elemwise.py b/aesara/tensor/elemwise.py
similarity index 97%
rename from theano/tensor/elemwise.py
rename to aesara/tensor/elemwise.py
index f88e4a99ca..1d358500b5 100644
--- a/theano/tensor/elemwise.py
+++ b/aesara/tensor/elemwise.py
@@ -2,20 +2,20 @@
 
 import numpy as np
 
-import theano.tensor.basic
-from theano.configdefaults import config
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply
-from theano.graph.null_type import NullType
-from theano.graph.op import COp, ExternalCOp, OpenMPOp
-from theano.graph.params_type import ParamsType
-from theano.graph.utils import MethodNotDefined
-from theano.link.c.basic import failure_code
-from theano.misc.frozendict import frozendict
-from theano.misc.safe_asarray import _asarray
-from theano.printing import FunctionPrinter, pprint
-from theano.scalar import get_scalar_type
-from theano.scalar.basic import (
+import aesara.tensor.basic
+from aesara.configdefaults import config
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply
+from aesara.graph.null_type import NullType
+from aesara.graph.op import COp, ExternalCOp, OpenMPOp
+from aesara.graph.params_type import ParamsType
+from aesara.graph.utils import MethodNotDefined
+from aesara.link.c.basic import failure_code
+from aesara.misc.frozendict import frozendict
+from aesara.misc.safe_asarray import _asarray
+from aesara.printing import FunctionPrinter, pprint
+from aesara.scalar import get_scalar_type
+from aesara.scalar.basic import (
     AND,
     OR,
     XOR,
@@ -25,18 +25,18 @@
     ScalarMaximum,
     ScalarMinimum,
 )
-from theano.scalar.basic import bool as scalar_bool
-from theano.scalar.basic import identity as scalar_identity
-from theano.scalar.basic import scalar_maximum, scalar_minimum, transfer_type, upcast
-from theano.tensor import elemwise_cgen as cgen
-from theano.tensor.type import (
+from aesara.scalar.basic import bool as scalar_bool
+from aesara.scalar.basic import identity as scalar_identity
+from aesara.scalar.basic import scalar_maximum, scalar_minimum, transfer_type, upcast
+from aesara.tensor import elemwise_cgen as cgen
+from aesara.tensor.type import (
     TensorType,
     continuous_dtypes,
     discrete_dtypes,
     float_dtypes,
     lvector,
 )
-from theano.utils import uniq
+from aesara.utils import uniq
 
 
 _numpy_ver = [int(n) for n in np.__version__.split(".")[:2]]
@@ -59,7 +59,7 @@ class DimShuffle(ExternalCOp):
         A list representing the relationship between the input's
         dimensions and the output's dimensions. Each element of the
         list can either be an index or 'x'. Indices must be encoded
-        as python integers, not theano symbolic integers.
+        as python integers, not aesara symbolic integers.
     inplace : bool, optional
         If True (default), the output will be a view of the input.
 
@@ -212,7 +212,7 @@ def __setstate__(self, state):
             super().__init__([self.c_func_file], self.c_func_name)
 
     def make_node(self, _input):
-        input = theano.tensor.basic.as_tensor_variable(_input)
+        input = aesara.tensor.basic.as_tensor_variable(_input)
         ib = tuple(input.type.broadcastable)
         if not ib == self.input_broadcastable:
             if len(ib) != len(self.input_broadcastable):
@@ -285,7 +285,7 @@ def R_op(self, inputs, eval_points):
         return self(*eval_points, **dict(return_list=True))
 
     def grad(self, inp, grads):
-        from theano.tensor.basic import as_tensor_variable
+        from aesara.tensor.basic import as_tensor_variable
 
         (x,) = inp
         (gz,) = grads
@@ -490,7 +490,7 @@ def make_node(self, *inputs):
         is left-completed to the greatest number of dimensions with 1s
         using DimShuffle.
         """
-        inputs = list(map(theano.tensor.basic.as_tensor_variable, inputs))
+        inputs = list(map(aesara.tensor.basic.as_tensor_variable, inputs))
         out_dtypes, out_broadcastables, inputs = self.get_output_info(
             DimShuffle, *inputs
         )
@@ -519,7 +519,7 @@ def R_op(self, inputs, eval_points):
             # make such that _bgrads computes only the gradients of the
             # current output on the inputs ( and not all outputs)
             ograds = [x.zeros_like() for x in outs]
-            ograds[idx] = theano.tensor.basic.ones_like(out)
+            ograds[idx] = aesara.tensor.basic.ones_like(out)
 
             bgrads = self._bgrad(inputs, outs, ograds)
             rop_out = None
@@ -553,7 +553,7 @@ def connection_pattern(self, node):
         return [[True for output in node.outputs] for ipt in node.inputs]
 
     def L_op(self, inputs, outs, ograds):
-        from theano.tensor.math import sum as tt_sum
+        from aesara.tensor.math import sum as tt_sum
 
         # Compute grad with respect to broadcasted input
         rval = self._bgrad(inputs, outs, ograds)
@@ -642,7 +642,7 @@ def transform(r):
                 # the gradient contains a constant, translate it as
                 # an equivalent TensorType of size 1 and proper number of
                 # dimensions
-                res = theano.tensor.basic.constant(
+                res = aesara.tensor.basic.constant(
                     np.asarray(r.data), dtype=r.type.dtype
                 )
                 return DimShuffle((), ["x"] * nd)(res)
@@ -830,13 +830,13 @@ def perform(self, node, inputs, output_storage):
                 odat[...] = variable
                 storage[0] = odat
             # Sometimes NumPy return a Python type.
-            # Some Theano op return a different dtype like floor, ceil,
+            # Some Aesara op return a different dtype like floor, ceil,
             # trunc, eq, ...
             elif not isinstance(variable, np.ndarray) or variable.dtype != nout.dtype:
                 variable = np.asarray(variable, nout.dtype)
                 # The next line is needed for numpy 1.9. Otherwise
                 # there are tests that fail in DebugMode.
-                # Normally we would call theano.misc._asarray, but it
+                # Normally we would call aesara.misc._asarray, but it
                 # is faster to inline the code. We know that the dtype
                 # are the same string, just different typenum.
                 if np.dtype(nout.dtype).num != variable.dtype.num:
@@ -1361,7 +1361,7 @@ def _output_dtype(self, input_dtype):
         return input_dtype
 
     def make_node(self, input):
-        from theano.tensor.basic import as_tensor_variable
+        from aesara.tensor.basic import as_tensor_variable
 
         input = as_tensor_variable(input)
 
@@ -1505,7 +1505,7 @@ def _c_all(self, node, name, inames, onames, sub):
         if len(axis) == 0:
             # The acc_dtype is never a downcast compared to the input dtype
             # So we just need a cast to the output dtype.
-            var = theano.tensor.basic.cast(input, node.outputs[0].dtype)
+            var = aesara.tensor.basic.cast(input, node.outputs[0].dtype)
             if var is input:
                 var = Elemwise(scalar_identity)(input)
             assert var.dtype == node.outputs[0].dtype
@@ -1849,7 +1849,7 @@ def make_node(self, input):
         # We need to redefine make_node so that, if self.dtype is None,
         # we can infer what dtype should be, and create a node from an Op
         # of the appropriate dtype.
-        input = theano.tensor.basic.as_tensor_variable(input)
+        input = aesara.tensor.basic.as_tensor_variable(input)
         dtype = self._output_dtype(input.dtype)
         acc_dtype = self._acc_dtype(input.dtype)
 
@@ -1893,7 +1893,7 @@ def scalar_elemwise(*symbol, nfunc=None, nin=None, nout=None, symbolname=None):
     not take a NumPy array argument to put its result in.
 
     """
-    import theano.scalar as scalar
+    import aesara.scalar as scalar
 
     def construct(symbol):
         nonlocal symbolname
diff --git a/theano/tensor/elemwise_cgen.py b/aesara/tensor/elemwise_cgen.py
similarity index 99%
rename from theano/tensor/elemwise_cgen.py
rename to aesara/tensor/elemwise_cgen.py
index bea590e870..e8fb097459 100644
--- a/theano/tensor/elemwise_cgen.py
+++ b/aesara/tensor/elemwise_cgen.py
@@ -1,4 +1,4 @@
-from theano.configdefaults import config
+from aesara.configdefaults import config
 
 
 def make_declare(loop_orders, dtypes, sub):
@@ -122,8 +122,8 @@ def make_alloc(loop_orders, dtype, sub, fortran="0"):
 
     """
     type = dtype.upper()
-    if type.startswith("THEANO_COMPLEX"):
-        type = type.replace("THEANO_COMPLEX", "NPY_COMPLEX")
+    if type.startswith("AESARA_COMPLEX"):
+        type = type.replace("AESARA_COMPLEX", "NPY_COMPLEX")
     nd = len(loop_orders[0])
     init_dims = ""
     # For each dimension, the tensors are either all broadcasted, in
diff --git a/theano/tensor/exceptions.py b/aesara/tensor/exceptions.py
similarity index 100%
rename from theano/tensor/exceptions.py
rename to aesara/tensor/exceptions.py
diff --git a/theano/tensor/extra_ops.py b/aesara/tensor/extra_ops.py
similarity index 96%
rename from theano/tensor/extra_ops.py
rename to aesara/tensor/extra_ops.py
index 4ffaeb9ad8..11e7258693 100644
--- a/theano/tensor/extra_ops.py
+++ b/aesara/tensor/extra_ops.py
@@ -2,30 +2,30 @@
 
 import numpy as np
 
-import theano
-from theano.assert_op import Assert
-from theano.gradient import (
+import aesara
+from aesara.assert_op import Assert
+from aesara.gradient import (
     DisconnectedType,
     _float_zeros_like,
     disconnected_type,
     grad_undefined,
 )
-from theano.graph.basic import Apply, equal_computations
-from theano.graph.op import COp, Op
-from theano.graph.params_type import ParamsType
-from theano.graph.type import EnumList, Generic
-from theano.misc.safe_asarray import _asarray
-from theano.scalar import int32 as int_t
-from theano.scalar import upcast
-from theano.tensor import basic as tt
-from theano.tensor import nlinalg
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import abs_
-from theano.tensor.math import all as tt_all
-from theano.tensor.math import eq, ge, lt, maximum, minimum, or_, prod
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.subtensor import advanced_inc_subtensor1, set_subtensor
-from theano.tensor.type import (
+from aesara.graph.basic import Apply, equal_computations
+from aesara.graph.op import COp, Op
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import EnumList, Generic
+from aesara.misc.safe_asarray import _asarray
+from aesara.scalar import int32 as int_t
+from aesara.scalar import upcast
+from aesara.tensor import basic as tt
+from aesara.tensor import nlinalg
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import abs_
+from aesara.tensor.math import all as tt_all
+from aesara.tensor.math import eq, ge, lt, maximum, minimum, or_, prod
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.subtensor import advanced_inc_subtensor1, set_subtensor
+from aesara.tensor.type import (
     TensorType,
     dvector,
     int_dtypes,
@@ -33,7 +33,7 @@
     integer_dtypes,
     vector,
 )
-from theano.utils import LOCAL_BITWIDTH, PYTHON_INT_BITWIDTH
+from aesara.utils import LOCAL_BITWIDTH, PYTHON_INT_BITWIDTH
 
 
 class CpuContiguous(COp):
@@ -258,8 +258,8 @@ def searchsorted(x, v, side="left", sorter=None):
 
     Examples
     --------
-    >>> from theano import tensor as tt
-    >>> from theano.tensor import extra_ops
+    >>> from aesara import tensor as tt
+    >>> from aesara.tensor import extra_ops
     >>> x = tt.dvector()
     >>> idx = x.searchsorted(3)
     >>> idx.eval({x: [1,2,3,4,5]})
@@ -449,7 +449,7 @@ def cumprod(x, axis=None):
 
 
 # CumsumOp and CumprodOp are for compatibility with old version,
-# just in case unpickling a theano function with old Ops.
+# just in case unpickling an Aesara function with old Ops.
 class CumsumOp(Op):
     __props__ = ("axis",)
 
@@ -851,7 +851,7 @@ def make_node(self, M):
         if M.ndim != 0:
             raise TypeError(f"{self.__class__.__name__} only works on scalar input")
         elif M.dtype not in integer_dtypes:
-            # dtype is a theano attribute here
+            # dtype is an Aesara attribute here
             raise TypeError(f"{self.__class__.__name__} only works on integer input")
         return Apply(self, [M], [dvector()])
 
@@ -1035,7 +1035,7 @@ def make_node(self, a, val, offset):
         elif offset.dtype not in integer_dtypes:
             raise TypeError(
                 f"{self.__class__.__name__}: type of third parameter must be as integer"
-                " use theano.tensor.cast( input, 'int32/int64')"
+                " use aesara.tensor.cast( input, 'int32/int64')"
             )
 
         return Apply(self, [a, val, offset], [a.type()])
@@ -1181,15 +1181,15 @@ class Unique(Op):
     Examples
     --------
     >>> import numpy as np
-    >>> import theano
+    >>> import aesara
 
-    >>> x = theano.tensor.vector()
-    >>> f = theano.function([x], Unique(True, True, False)(x))
+    >>> x = aesara.tensor.vector()
+    >>> f = aesara.function([x], Unique(True, True, False)(x))
     >>> f([1, 2., 3, 4, 3, 2, 1.])
     [array([ 1.,  2.,  3.,  4.]), array([0, 1, 2, 3]), array([0, 1, 2, 3, 2, 1, 0])]
 
-    >>> y = theano.tensor.matrix()
-    >>> g = theano.function([y], Unique(True, True, False)(y))
+    >>> y = aesara.tensor.matrix()
+    >>> g = aesara.function([y], Unique(True, True, False)(y))
     >>> g([[1, 1, 1.0], (2, 3, 3.0)])
     [array([ 1.,  2.,  3.]), array([0, 3, 4]), array([0, 0, 0, 1, 2, 2])]
 
@@ -1348,7 +1348,7 @@ def unravel_index(indices, dims, order="C"):
 
     Parameters
     ----------
-    indices : Theano or NumPy array
+    indices : Aesara or NumPy array
         An integer array whose elements are indices into the flattened
         version of an array of dimensions ``dims``.
     dims : tuple of ints
@@ -1418,7 +1418,7 @@ def ravel_multi_index(multi_index, dims, mode="raise", order="C"):
 
     Parameters
     ----------
-    multi_index : tuple of Theano or NumPy arrays
+    multi_index : tuple of Aesara or NumPy arrays
         A tuple of integer arrays, one array for each dimension.
     dims : tuple of ints
         The shape of array into which the indices from ``multi_index`` apply.
@@ -1437,7 +1437,7 @@ def ravel_multi_index(multi_index, dims, mode="raise", order="C"):
 
     Returns
     -------
-    raveled_indices : Theano array
+    raveled_indices : Aesara array
         An array of indices into the flattened version of an array
         of dimensions ``dims``.
 
@@ -1485,7 +1485,7 @@ def broadcast_shape_iter(arrays, **kwargs):
         are (scalar) constants with the value `1` or `1` exactly.
 
     """
-    one = theano.scalar.ScalarConstant(theano.scalar.int64, 1)
+    one = aesara.scalar.ScalarConstant(aesara.scalar.int64, 1)
 
     arrays_are_shapes = kwargs.pop("arrays_are_shapes", False)
     if arrays_are_shapes:
diff --git a/theano/tensor/fft.py b/aesara/tensor/fft.py
similarity index 95%
rename from theano/tensor/fft.py
rename to aesara/tensor/fft.py
index fe5853f3b4..8da2467204 100644
--- a/theano/tensor/fft.py
+++ b/aesara/tensor/fft.py
@@ -1,12 +1,12 @@
 import numpy as np
 
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.math import sqrt
-from theano.tensor.subtensor import set_subtensor
-from theano.tensor.type import TensorType, integer_dtypes
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.math import sqrt
+from aesara.tensor.subtensor import set_subtensor
+from aesara.tensor.type import TensorType, integer_dtypes
 
 
 class RFFTOp(Op):
diff --git a/theano/tensor/fourier.py b/aesara/tensor/fourier.py
similarity index 90%
rename from theano/tensor/fourier.py
rename to aesara/tensor/fourier.py
index 27bd382566..e7473b88a5 100644
--- a/theano/tensor/fourier.py
+++ b/aesara/tensor/fourier.py
@@ -2,9 +2,9 @@
 
 import numpy as np
 
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor.basic import (
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor.basic import (
     arange,
     as_tensor_variable,
     concatenate,
@@ -12,16 +12,16 @@
     stack,
     switch,
 )
-from theano.tensor.math import exp, lt, outer, tensordot
-from theano.tensor.shape import shape
-from theano.tensor.subtensor import set_subtensor
-from theano.tensor.type import TensorType, integer_dtypes
-from theano.tensor.var import TensorConstant
+from aesara.tensor.math import exp, lt, outer, tensordot
+from aesara.tensor.shape import shape
+from aesara.tensor.subtensor import set_subtensor
+from aesara.tensor.type import TensorType, integer_dtypes
+from aesara.tensor.var import TensorConstant
 
 
 class Fourier(Op):
     """
-    WARNING: for officially supported FFTs, use theano.tensor.fft, which
+    WARNING: for officially supported FFTs, use aesara.tensor.fft, which
     provides real-input FFTs. Gradients are supported, as well as optimization
     transfers to GPU ops.
 
@@ -141,8 +141,8 @@ def grad(self, inputs, cost_grad):
         grad = cost_grad[0]
         if not isinstance(axis, TensorConstant):
             raise NotImplementedError(
-                "%s: gradient is currently implemented"
-                " only for axis being a Theano constant" % self.__class__.__name__
+                f"{self.__class__.__name__}: gradient is currently implemented"
+                " only for axis being an Aesara constant"
             )
         axis = int(axis.data)
         # notice that the number of actual elements in wrto is independent of
@@ -154,7 +154,7 @@ def grad(self, inputs, cost_grad):
         pow_outer = exp(((-2 * math.pi * 1j) * outer_res) / (1.0 * n))
         res = tensordot(grad, pow_outer, (axis, 0))
 
-        # This would be simpler but not implemented by theano:
+        # This would be simpler but not implemented by aesara:
         # res = switch(lt(n, shape(a)[axis]),
         # set_subtensor(res[...,n::], 0, False, False), res)
 
diff --git a/theano/tensor/inplace.py b/aesara/tensor/inplace.py
similarity index 98%
rename from theano/tensor/inplace.py
rename to aesara/tensor/inplace.py
index 292ae968a9..f3c1d741f1 100644
--- a/theano/tensor/inplace.py
+++ b/aesara/tensor/inplace.py
@@ -1,6 +1,6 @@
-from theano import printing
-from theano.printing import pprint
-from theano.tensor.elemwise import DimShuffle, scalar_elemwise
+from aesara import printing
+from aesara.printing import pprint
+from aesara.tensor.elemwise import DimShuffle, scalar_elemwise
 
 
 @scalar_elemwise
diff --git a/theano/tensor/io.py b/aesara/tensor/io.py
similarity index 96%
rename from theano/tensor/io.py
rename to aesara/tensor/io.py
index 81be511a20..e848c11a71 100644
--- a/theano/tensor/io.py
+++ b/aesara/tensor/io.py
@@ -1,10 +1,10 @@
 import numpy as np
 
-from theano.graph.basic import Apply, Constant, Variable
-from theano.graph.op import Op
-from theano.graph.sched import key_to_cmp
-from theano.graph.type import Generic
-from theano.tensor.type import tensor
+from aesara.graph.basic import Apply, Constant, Variable
+from aesara.graph.op import Op
+from aesara.graph.sched import key_to_cmp
+from aesara.graph.type import Generic
+from aesara.tensor.type import tensor
 
 
 class LoadFromDisk(Op):
@@ -79,11 +79,11 @@ def load(path, dtype, broadcastable, mmap_mode=None):
         will be mapped into virtual memory, so only the parts that are
         needed will be actually read from disk and put into memory.
         Other modes supported by numpy.load ('r', 'r+', 'w+') cannot
-        be supported by Theano.
+        be supported by Aesara.
 
     Examples
     --------
-    >>> from theano import *
+    >>> from aesara import *
     >>> path = Variable(Generic())
     >>> x = tensor.load(path, 'int64', (False,))
     >>> y = x*2
diff --git a/theano/tensor/math.py b/aesara/tensor/math.py
similarity index 98%
rename from theano/tensor/math.py
rename to aesara/tensor/math.py
index ac60fa218b..175ea1930c 100644
--- a/theano/tensor/math.py
+++ b/aesara/tensor/math.py
@@ -2,17 +2,17 @@
 
 import numpy as np
 
-from theano import config, printing
-from theano import scalar as ts
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import COp, Op
-from theano.graph.params_type import ParamsType
-from theano.graph.type import Generic
-from theano.misc.safe_asarray import _asarray
-from theano.printing import pprint
-from theano.scalar.basic import BinaryScalarOp
-from theano.tensor.basic import (
+from aesara import config, printing
+from aesara import scalar as ts
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import COp, Op
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import Generic
+from aesara.misc.safe_asarray import _asarray
+from aesara.printing import pprint
+from aesara.scalar.basic import BinaryScalarOp
+from aesara.tensor.basic import (
     alloc,
     arange,
     as_tensor_variable,
@@ -23,15 +23,15 @@
     stack,
     switch,
 )
-from theano.tensor.elemwise import (
+from aesara.tensor.elemwise import (
     CAReduce,
     CAReduceDtype,
     DimShuffle,
     Elemwise,
     scalar_elemwise,
 )
-from theano.tensor.shape import shape
-from theano.tensor.type import (
+from aesara.tensor.shape import shape
+from aesara.tensor.type import (
     complex_dtypes,
     continuous_dtypes,
     discrete_dtypes,
@@ -40,16 +40,16 @@
     tensor,
     uint_dtypes,
 )
-from theano.tensor.type_other import NoneConst
-from theano.tensor.utils import as_list
-from theano.tensor.var import TensorConstant, _tensor_py_operators
+from aesara.tensor.type_other import NoneConst
+from aesara.tensor.utils import as_list
+from aesara.tensor.var import TensorConstant, _tensor_py_operators
 
 
 if int(config.tensor__cmp_sloppy) > 1:
     # This config variable is a quick-and-dirty way to get low-precision
     # comparisons.  For a more precise setting of these tolerances set
     # them explicitly in your user code by assigning, for example,
-    # "theano.tensor.math.float32_atol = ..."
+    # "aesara.tensor.math.float32_atol = ..."
 
     # When config.tensor__cmp_sloppy>1 we are even more sloppy. This is
     # useful to test the GPU as they don't use extended precision and
@@ -890,35 +890,35 @@ def isclose(a, b, rtol=1.0e-5, atol=1.0e-8, equal_nan=False):
 
     Examples
     --------
-    >>> import theano
+    >>> import aesara
     >>> import numpy as np
     >>> a = _asarray([1e10, 1e-7], dtype="float64")
     >>> b = _asarray([1.00001e10, 1e-8], dtype="float64")
-    >>> theano.tensor.isclose(a, b).eval()
+    >>> aesara.tensor.isclose(a, b).eval()
     array([1, 0], dtype=int8)
     >>> a = _asarray([1e10, 1e-8], dtype="float64")
     >>> b = _asarray([1.00001e10, 1e-9], dtype="float64")
-    >>> theano.tensor.isclose(a, b).eval()
+    >>> aesara.tensor.isclose(a, b).eval()
     array([1, 1], dtype=int8)
     >>> a = _asarray([1e10, 1e-8], dtype="float64")
     >>> b = _asarray([1.0001e10, 1e-9], dtype="float64")
-    >>> theano.tensor.isclose(a, b).eval()
+    >>> aesara.tensor.isclose(a, b).eval()
     array([0, 1], dtype=int8)
     >>> a = _asarray([1.0, np.nan], dtype="float64")
     >>> b = _asarray([1.0, np.nan], dtype="float64")
-    >>> theano.tensor.isclose(a, b).eval()
+    >>> aesara.tensor.isclose(a, b).eval()
     array([1, 0], dtype==int8)
     >>> a = _asarray([1.0, np.nan], dtype="float64")
     >>> b = _asarray([1.0, np.nan], dtype="float64")
-    >>> theano.tensor.isclose(a, b, equal_nan=True).eval()
+    >>> aesara.tensor.isclose(a, b, equal_nan=True).eval()
     array([1, 1], dtype==int8)
     >>> a = _asarray([1.0, np.inf], dtype="float64")
     >>> b = _asarray([1.0, -np.inf], dtype="float64")
-    >>> theano.tensor.isclose(a, b).eval()
+    >>> aesara.tensor.isclose(a, b).eval()
     array([1, 0], dtype==int8)
     >>> a = _asarray([1.0, np.inf], dtype="float64")
     >>> b = _asarray([1.0, np.inf], dtype="float64")
-    >>> theano.tensor.isclose(a, b).eval()
+    >>> aesara.tensor.isclose(a, b).eval()
     array([1, 1], dtype==int8)
 
     """
@@ -1093,9 +1093,9 @@ def round(a, mode=None):
         mode = "half_to_even"
         if config.warn__round:
             warnings.warn(
-                "theano.tensor.round() changed its default from"
+                "aesara.tensor.round() changed its default from"
                 " `half_away_from_zero` to `half_to_even` to have"
-                " the same default as NumPy. Use the Theano flag"
+                " the same default as NumPy. Use the Aesara flag"
                 " `warn__round=False` to disable this warning."
             )
     if mode == "half_away_from_zero":
@@ -1536,7 +1536,7 @@ def mean(input, axis=None, dtype=None, op=False, keepdims=False, acc_dtype=None)
     else:
         axis = [int(a) for a in axis]
 
-    # This sequential division will possibly be optimized by Theano:
+    # This sequential division will possibly be optimized by Aesara:
     for i in axis:
         s = true_div(s, shp[i])
 
@@ -1823,13 +1823,13 @@ def make_node(self, *inputs):
             raise TypeError(
                 "Input 0 (0-indexed) must have ndim of "
                 f"1 or 2, {int(inputs[0].ndim)} given. Consider calling "
-                "theano.tensor.dot instead."
+                "aesara.tensor.dot instead."
             )
         if inputs[1].ndim not in (1, 2):
             raise TypeError(
                 "Input 1 (0-indexed) must have ndim of "
                 f"1 or 2, {int(inputs[1].ndim)} given. Consider calling "
-                "theano.tensor.dot instead."
+                "aesara.tensor.dot instead."
             )
 
         i_broadcastables = [input.type.broadcastable for input in inputs]
@@ -1983,15 +1983,15 @@ def dense_dot(a, b):
     sequence:
 
         1.  If either a or b is scalar, it returns the elementwise product
-            without calling the Theano Dot op.
+            without calling the Aesara Dot op.
 
-        2.  If either a or b has more than 2 dimensions, it calls Theano's
+        2.  If either a or b has more than 2 dimensions, it calls Aesara's
             tensordot function with appropriate axes. The tensordot function
             expresses high-dimensional dot products in terms of 2D matrix
             multiplications, so it may be possible to futherize optimize for
             performance.
 
-        3.  If both a and b have either 1 or 2 dimensions, it calls Theano's
+        3.  If both a and b have either 1 or 2 dimensions, it calls Aesara's
             Dot op on a and b.
 
     Notes
@@ -2142,7 +2142,7 @@ def tensordot(a, b, axes=2):
     Compute a generalized dot product over provided axes.
 
     Given two tensors a and b, tensordot computes a generalized dot product over
-    the provided axes. Theano's implementation reduces all expressions to
+    the provided axes. Aesara's implementation reduces all expressions to
     matrix or vector dot products and is based on code from Tijmen Tieleman's
     gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
 
@@ -2186,7 +2186,7 @@ def tensordot(a, b, axes=2):
     Examples
     --------
     It may be helpful to consider an example to see what tensordot does.
-    Theano's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
+    Aesara's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
     and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
     note that a.shape[1] == b.shape[3] and a.shape[2] == b.shape[2]; these axes
     are compatible. The resulting tensor will have shape (2, 5, 6) -- the
@@ -2653,7 +2653,7 @@ def __init__(self, axis=None, dtype=None, acc_dtype=None):
         super().__init__(mul_without_zeros, axis=axis, dtype=dtype, acc_dtype=acc_dtype)
 
     def grad(self, inp, grads):
-        from theano.gradient import grad_not_implemented
+        from aesara.gradient import grad_not_implemented
 
         (a,) = inp
         a_grad = grad_not_implemented(
diff --git a/theano/tensor/math_opt.py b/aesara/tensor/math_opt.py
similarity index 98%
rename from theano/tensor/math_opt.py
rename to aesara/tensor/math_opt.py
index 5a7d5b6359..cc5e1b3fe1 100644
--- a/theano/tensor/math_opt.py
+++ b/aesara/tensor/math_opt.py
@@ -10,12 +10,12 @@
 
 import numpy as np
 
-import theano.scalar.basic as ts
-from theano import compile
-from theano.assert_op import assert_op
-from theano.configdefaults import config
-from theano.graph.basic import Constant, Variable
-from theano.graph.opt import (
+import aesara.scalar.basic as ts
+from aesara import compile
+from aesara.assert_op import assert_op
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, Variable
+from aesara.graph.opt import (
     LocalOptGroup,
     LocalOptimizer,
     PatternSub,
@@ -23,8 +23,8 @@
     in2out,
     local_optimizer,
 )
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.basic import (
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.basic import (
     Alloc,
     Join,
     MakeVector,
@@ -39,7 +39,7 @@
     switch,
     zeros_like,
 )
-from theano.tensor.basic_opt import (
+from aesara.tensor.basic_opt import (
     FusionOptimizer,
     _fill_chain,
     broadcast_like,
@@ -54,9 +54,9 @@
     register_useless,
     scalarconsts_rest,
 )
-from theano.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import (
+from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import (
     All,
     Any,
     Dot,
@@ -77,25 +77,25 @@
     log1p,
     makeKeepDims,
 )
-from theano.tensor.math import max as tt_max
-from theano.tensor.math import maximum, mul, neg
-from theano.tensor.math import pow as tt_pow
-from theano.tensor.math import prod, sgn, sqr, sqrt, sub
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import true_div
-from theano.tensor.shape import Shape, Shape_i
-from theano.tensor.subtensor import Subtensor
-from theano.tensor.type import (
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.math import maximum, mul, neg
+from aesara.tensor.math import pow as tt_pow
+from aesara.tensor.math import prod, sgn, sqr, sqrt, sub
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import true_div
+from aesara.tensor.shape import Shape, Shape_i
+from aesara.tensor.subtensor import Subtensor
+from aesara.tensor.type import (
     uint_dtypes,
     values_eq_approx_remove_inf,
     values_eq_approx_remove_inf_nan,
     values_eq_approx_remove_nan,
 )
-from theano.tensor.var import TensorConstant
-from theano.utils import NoDuplicateOptWarningFilter
+from aesara.tensor.var import TensorConstant
+from aesara.utils import NoDuplicateOptWarningFilter
 
 
-_logger = logging.getLogger("theano.tensor.math_opt")
+_logger = logging.getLogger("aesara.tensor.math_opt")
 _logger.addFilter(NoDuplicateOptWarningFilter())
 
 
@@ -138,7 +138,7 @@ def local_0_dot_x(fgraph, node):
         else:
             _logger.warning(
                 "Optimization Warning: "
-                "Optimization theano/opt.py:local_0_dot_x Found "
+                "Optimization aesara/opt.py:local_0_dot_x Found "
                 "that it could apply, but was not implemented "
                 "for dot product with these input types:\n"
                 f"({x.type}, {y.type})"
@@ -492,8 +492,8 @@ class AlgebraicCanonizer(LocalOptimizer):
 
     Examples
     --------
-    >>> import theano.tensor as tt
-    >>> from theano.tensor.math_opt import AlgebraicCanonizer
+    >>> import aesara.tensor as tt
+    >>> from aesara.tensor.math_opt import AlgebraicCanonizer
     >>> add_canonizer = AlgebraicCanonizer(add, sub, neg, \\
     ...                                    lambda n, d: sum(n) - sum(d))
     >>> mul_canonizer = AlgebraicCanonizer(mul, true_div, inv, \\
@@ -910,7 +910,7 @@ def same(x, y):
 
         if new.type == out.type:
             # This happen with test
-            # theano/tensor/tests/test_opt.py:T_local_switch_sink
+            # aesara/tensor/tests/test_opt.py:T_local_switch_sink
             new.tag.values_eq_approx = values_eq_approx_remove_inf_nan
 
             # We need to implement the copy over of the stacktrace.
@@ -1345,11 +1345,11 @@ def local_sum_prod_div_dimshuffle(fgraph, node):
                 if compatible_dims:
                     _logger.warning(
                         "Your current code is fine, but"
-                        " Theano versions between "
+                        " Aesara versions between "
                         "rev. 3bd9b789f5e8 (2010-06-16) and"
                         " cfc6322e5ad4 (2010-08-03) would "
                         "have given an incorrect result. "
-                        "To disable this warning, set the Theano"
+                        "To disable this warning, set the Aesara"
                         " flag warn__sum_div_dimshuffle_bug to"
                         " False."
                     )
@@ -1400,12 +1400,12 @@ def local_sum_prod_div_dimshuffle(fgraph, node):
                         ):
                             _logger.warning(
                                 "Your current code is fine,"
-                                " but Theano versions between "
+                                " but Aesara versions between "
                                 "rev. 3bd9b789f5e8 (2010-06-16) and"
                                 " cfc6322e5ad4 (2010-08-03) would "
                                 "have given an incorrect result. "
                                 "To disable this warning, set the"
-                                " Theano flag "
+                                " Aesara flag "
                                 "warn__sum_div_dimshuffle_bug"
                                 " to False."
                             )
@@ -1509,7 +1509,7 @@ def local_op_of_op(fgraph, node):
                     and len(newaxis) == len(newaxis_old)
                 ):
                     _logger.warning(
-                        "(YOUR CURRENT CODE IS FINE): Theano "
+                        "(YOUR CURRENT CODE IS FINE): Aesara "
                         "versions between version 9923a40c7b7a and August "
                         "2nd, 2010 generated bugged code in this case. "
                         "This happens when there are two consecutive sums "
@@ -1518,7 +1518,7 @@ def local_op_of_op(fgraph, node):
                         "removed some bad code, but not in all cases. You "
                         "are in one such case. To disable this warning "
                         "(that you can safely ignore since this bug has "
-                        "been fixed) set the theano flag "
+                        "been fixed) set the aesara flag "
                         "`warn__sum_sum_bug` to False."
                     )
 
@@ -1595,10 +1595,10 @@ def local_reduce_join(fgraph, node):
         if len(reduce_axis) != 1 or 0 not in reduce_axis:
             if config.warn__reduce_join:
                 warnings.warning(
-                    "Your current code is fine, but Theano versions "
+                    "Your current code is fine, but Aesara versions "
                     "prior to 0.7 (or this development version Sept 2014) "
                     "might have given an incorrect result for this code. "
-                    "To disable this warning, set the Theano flag "
+                    "To disable this warning, set the Aesara flag "
                     "warn__reduce_join to False. The problem was an "
                     "optimization, that modified the pattern "
                     '"Reduce{scalar.op}(Join(axis=0, a, b), axis=0)", '
diff --git a/theano/tensor/nlinalg.py b/aesara/tensor/nlinalg.py
similarity index 97%
rename from theano/tensor/nlinalg.py
rename to aesara/tensor/nlinalg.py
index 9dfd4384cf..e5cc9893e9 100644
--- a/theano/tensor/nlinalg.py
+++ b/aesara/tensor/nlinalg.py
@@ -4,14 +4,14 @@
 
 import numpy as np
 
-from theano import scalar as ts
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor import basic as tt
-from theano.tensor import math as tm
-from theano.tensor.basic import ExtractDiag, as_tensor_variable
-from theano.tensor.type import dvector, lscalar, matrix, scalar, vector
+from aesara import scalar as ts
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor import basic as tt
+from aesara.tensor import math as tm
+from aesara.tensor.basic import ExtractDiag, as_tensor_variable
+from aesara.tensor.type import dvector, lscalar, matrix, scalar, vector
 
 
 logger = logging.getLogger(__name__)
@@ -177,8 +177,8 @@ class AllocDiag(Op):
 
     def make_node(self, _x):
         warnings.warn(
-            "DeprecationWarning: theano.tensor.nlinalg.AllocDiag"
-            "is deprecated, please use theano.tensor.basic.AllocDiag"
+            "DeprecationWarning: aesara.tensor.nlinalg.AllocDiag"
+            "is deprecated, please use aesara.tensor.basic.AllocDiag"
             "instead.",
             category=DeprecationWarning,
         )
@@ -747,7 +747,7 @@ def norm(x, ord):
 class TensorInv(Op):
     """
     Class wrapper for tensorinv() function;
-    Theano utilization of numpy.linalg.tensorinv;
+    Aesara utilization of numpy.linalg.tensorinv;
     """
 
     _numop = staticmethod(np.linalg.tensorinv)
@@ -774,7 +774,7 @@ def infer_shape(self, fgraph, node, shapes):
 def tensorinv(a, ind=2):
     """
     Does not run on GPU;
-    Theano utilization of numpy.linalg.tensorinv;
+    Aesara utilization of numpy.linalg.tensorinv;
 
     Compute the 'inverse' of an N-dimensional array.
     The result is an inverse for `a` relative to the tensordot operation
@@ -806,7 +806,7 @@ def tensorinv(a, ind=2):
 
 class TensorSolve(Op):
     """
-    Theano utilization of numpy.linalg.tensorsolve
+    Aesara utilization of numpy.linalg.tensorsolve
     Class wrapper for tensorsolve function.
 
     """
@@ -835,7 +835,7 @@ def perform(self, node, inputs, outputs):
 
 def tensorsolve(a, b, axes=None):
     """
-    Theano utilization of numpy.linalg.tensorsolve. Does not run on GPU!
+    Aesara utilization of numpy.linalg.tensorsolve. Does not run on GPU!
 
     Solve the tensor equation ``a x = b`` for x.
     It is assumed that all indices of `x` are summed over in the product,
diff --git a/theano/tensor/nnet/__init__.py b/aesara/tensor/nnet/__init__.py
similarity index 84%
rename from theano/tensor/nnet/__init__.py
rename to aesara/tensor/nnet/__init__.py
index 1f2fbbc843..dbffeefdee 100644
--- a/theano/tensor/nnet/__init__.py
+++ b/aesara/tensor/nnet/__init__.py
@@ -1,7 +1,7 @@
 import warnings
 
-import theano.tensor.nnet.opt
-from theano.tensor.nnet.abstract_conv import (
+import aesara.tensor.nnet.opt
+from aesara.tensor.nnet.abstract_conv import (
     abstract_conv2d,
     conv2d,
     conv2d_grad_wrt_inputs,
@@ -9,7 +9,7 @@
     conv3d,
     separable_conv2d,
 )
-from theano.tensor.nnet.basic import (
+from aesara.tensor.nnet.basic import (
     binary_crossentropy,
     categorical_crossentropy,
     confusion_matrix,
@@ -42,8 +42,8 @@
     softmax_with_bias,
     softsign,
 )
-from theano.tensor.nnet.batchnorm import batch_normalization
-from theano.tensor.nnet.sigm import (
+from aesara.tensor.nnet.batchnorm import batch_normalization
+from aesara.tensor.nnet.sigm import (
     hard_sigmoid,
     scalar_sigmoid,
     sigmoid,
diff --git a/theano/tensor/nnet/abstract_conv.py b/aesara/tensor/nnet/abstract_conv.py
similarity index 98%
rename from theano/tensor/nnet/abstract_conv.py
rename to aesara/tensor/nnet/abstract_conv.py
index fd8edd1e12..913161fcb4 100644
--- a/theano/tensor/nnet/abstract_conv.py
+++ b/aesara/tensor/nnet/abstract_conv.py
@@ -16,18 +16,18 @@
 
 import numpy as np
 
-import theano
-from theano.assert_op import Assert
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import Op
-from theano.tensor.basic import (
+import aesara
+from aesara.assert_op import Assert
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import Op
+from aesara.tensor.basic import (
     as_tensor_variable,
     get_scalar_constant_value,
     patternbroadcast,
 )
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.var import TensorConstant, TensorVariable
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.var import TensorConstant, TensorVariable
 
 
 try:
@@ -40,7 +40,7 @@
 
 
 __docformat__ = "restructuredtext en"
-_logger = logging.getLogger("theano.tensor.nnet.abstract_conv")
+_logger = logging.getLogger("aesara.tensor.nnet.abstract_conv")
 
 
 def get_conv_output_shape(
@@ -560,12 +560,12 @@ def assert_conv_shape(shape):
                 assert_shp = Assert(
                     f"The convolution would produce an invalid shape (dim[{int(i)}] < 0)."
                 )
-                out_shape.append(assert_shp(n, theano.tensor.ge(n, 0)))
+                out_shape.append(assert_shp(n, aesara.tensor.ge(n, 0)))
             else:
                 assert_shp = Assert(
                     f"The convolution would produce an invalid shape (dim[{int(i)}] < 0)."
                 )
-                out_shape.append(assert_shp(n, theano.tensor.gt(n, 0)))
+                out_shape.append(assert_shp(n, aesara.tensor.gt(n, 0)))
     return tuple(out_shape)
 
 
@@ -597,7 +597,7 @@ def assert_shape(x, expected_shape, msg="Unexpected shape."):
     tests = []
     for i in range(x.ndim):
         if expected_shape[i] is not None:
-            tests.append(theano.tensor.eq(shape[i], expected_shape[i]))
+            tests.append(aesara.tensor.eq(shape[i], expected_shape[i]))
     if tests:
         return Assert(msg)(x, *tests)
     else:
@@ -682,7 +682,7 @@ def abstract_conv2d(
     stack of 2D inputs with a set of 2D filters. The implementation is modelled
     after Convolutional Neural Networks (CNN).
 
-    Refer to :func:`nnet.conv2d <theano.tensor.nnet.conv2d>` for a more detailed documentation.
+    Refer to :func:`nnet.conv2d <aesara.tensor.nnet.conv2d>` for a more detailed documentation.
     """
 
     input = as_tensor_variable(input)
@@ -1055,7 +1055,7 @@ def conv3d(
         GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
         "caffe style convolution".
 
-        This is only supported in Theano 0.8 or the development
+        This is only supported in Aesara 0.8 or the development
         version until it is released.
 
     """
@@ -1192,7 +1192,7 @@ def conv2d_grad_wrt_inputs(
         GPU. Otherwise, it is the *CorrMM* convolution that will be used
         "caffe style convolution".
 
-    :note: This is only supported in Theano 0.8 or the development
+    :note: This is only supported in Aesara 0.8 or the development
         version until it is released.
 
     """
@@ -1355,7 +1355,7 @@ def conv3d_grad_wrt_inputs(
         GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
         "caffe style convolution".
 
-    :note: This is only supported in Theano 0.8 or the development
+    :note: This is only supported in Aesara 0.8 or the development
         version until it is released.
 
     """
@@ -1508,7 +1508,7 @@ def conv2d_grad_wrt_weights(
         GPU. Otherwise, it is the *CorrMM* convolution that will be used
         "caffe style convolution".
 
-    :note: This is only supported in Theano 0.8 or the development
+    :note: This is only supported in Aesara 0.8 or the development
         version until it is released.
 
     """
@@ -1652,7 +1652,7 @@ def conv3d_grad_wrt_weights(
         GPU. Otherwise, it is the *Corr3dMM* convolution that will be used
         "caffe style convolution".
 
-    :note: This is only supported in Theano 0.8 or the development
+    :note: This is only supported in Aesara 0.8 or the development
         version until it is released.
 
     """
@@ -1811,7 +1811,7 @@ def bilinear_kernel_2D(ratio, normalize=True):
 
     Parameters
     ----------
-    ratio: int or Constant/Scalar Theano tensor of int* dtype
+    ratio: int or Constant/Scalar Aesara tensor of int* dtype
         the ratio by which an image will be upsampled by the returned filter
         in the 2D space.
 
@@ -1847,7 +1847,7 @@ def bilinear_kernel_1D(ratio, normalize=True):
 
     Parameters
     ----------
-    ratio: int or Constant/Scalar Theano tensor of int* dtype
+    ratio: int or Constant/Scalar Aesara tensor of int* dtype
         the ratio by which an image will be upsampled by the returned filter
         in the 2D space.
 
@@ -1863,7 +1863,7 @@ def bilinear_kernel_1D(ratio, normalize=True):
 
     """
 
-    T = theano.tensor
+    T = aesara.tensor
     half_kern = T.arange(1, ratio + 1, dtype=config.floatX)
     kern = T.concatenate([half_kern, half_kern[-2::-1]])
 
@@ -1903,7 +1903,7 @@ def frac_bilinear_upsampling(input, frac_ratio):
         sides. This does not happen when it is odd.
     """
 
-    T = theano.tensor
+    T = aesara.tensor
     row, col = input.shape[2:]
     up_input = input.reshape((-1, 1, row, col))
 
@@ -2048,7 +2048,7 @@ def bilinear_upsampling(
         return frac_bilinear_upsampling(input, frac_ratio=frac_ratio)
 
     # the remaining case if integer ratio with use_1D_kernel
-    T = theano.tensor
+    T = aesara.tensor
     try:
         up_bs = batch_size * num_input_channels
     except TypeError:
@@ -2497,7 +2497,7 @@ def unshared2d(self, inp, kern, out_shape, direction="forward"):
 
 class AbstractConv(BaseAbstractConv):
     """Abstract Op for the forward convolution.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
     """
 
@@ -2712,7 +2712,7 @@ def infer_shape(self, fgraph, node, input_shapes):
 
 class AbstractConv2d(AbstractConv):
     """Abstract Op for the forward convolution.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
     """
 
@@ -2778,7 +2778,7 @@ def grad(self, inp, grads):
 
 class AbstractConv3d(AbstractConv):
     """Abstract Op for the forward convolution.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
     """
 
@@ -2839,11 +2839,11 @@ def grad(self, inp, grads):
 
 class AbstractConv_gradWeights(BaseAbstractConv):
     """Gradient wrt. filters for `AbstractConv`.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
 
     :note: You will not want to use this directly, but rely on
-           Theano's automatic differentiation or graph optimization to
+           Aesara's automatic differentiation or graph optimization to
            use it as needed.
 
     """
@@ -3067,11 +3067,11 @@ def infer_shape(self, fgraph, node, input_shapes):
 
 class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
     """Gradient wrt. filters for `AbstractConv2d`.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
 
     :note: You will not want to use this directly, but rely on
-           Theano's automatic differentiation or graph optimization to
+           Aesara's automatic differentiation or graph optimization to
            use it as needed.
 
     """
@@ -3132,17 +3132,17 @@ def grad(self, inp, grads):
         d_top = patternbroadcast(d_top, top.broadcastable)
         d_top = top.type.filter_variable(d_top)
 
-        d_height_width = (theano.gradient.DisconnectedType()(),)
+        d_height_width = (aesara.gradient.DisconnectedType()(),)
         return (d_bottom, d_top) + d_height_width
 
 
 class AbstractConv3d_gradWeights(AbstractConv_gradWeights):
     """Gradient wrt. filters for `AbstractConv3d`.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
 
     :note: You will not want to use this directly, but rely on
-           Theano's automatic differentiation or graph optimization to
+           Aesara's automatic differentiation or graph optimization to
            use it as needed.
 
     """
@@ -3199,17 +3199,17 @@ def grad(self, inp, grads):
         d_top = patternbroadcast(d_top, top.broadcastable)
         d_top = top.type.filter_variable(d_top)
 
-        d_depth_height_width = (theano.gradient.DisconnectedType()(),)
+        d_depth_height_width = (aesara.gradient.DisconnectedType()(),)
         return (d_bottom, d_top) + d_depth_height_width
 
 
 class AbstractConv_gradInputs(BaseAbstractConv):
     """Gradient wrt. inputs for `AbstractConv`.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
 
     :note: You will not want to use this directly, but rely on
-           Theano's automatic differentiation or graph optimization to
+           Aesara's automatic differentiation or graph optimization to
            use it as needed.
 
     """
@@ -3459,11 +3459,11 @@ def infer_shape(self, fgraph, node, input_shapes):
 
 class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
     """Gradient wrt. inputs for `AbstractConv2d`.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
 
     :note: You will not want to use this directly, but rely on
-           Theano's automatic differentiation or graph optimization to
+           Aesara's automatic differentiation or graph optimization to
            use it as needed.
 
     """
@@ -3524,17 +3524,17 @@ def grad(self, inp, grads):
         d_top = patternbroadcast(d_top, top.broadcastable)
         d_top = top.type.filter_variable(d_top)
 
-        d_height_width = (theano.gradient.DisconnectedType()(),)
+        d_height_width = (aesara.gradient.DisconnectedType()(),)
         return (d_weights, d_top) + d_height_width
 
 
 class AbstractConv3d_gradInputs(AbstractConv_gradInputs):
     """Gradient wrt. inputs for `AbstractConv3d`.
-    Refer to :func:`BaseAbstractConv <theano.tensor.nnet.abstract_conv.BaseAbstractConv>`
+    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
     for a more detailed documentation.
 
     :note: You will not want to use this directly, but rely on
-           Theano's automatic differentiation or graph optimization to
+           Aesara's automatic differentiation or graph optimization to
            use it as needed.
 
     """
@@ -3591,7 +3591,7 @@ def grad(self, inp, grads):
         d_top = patternbroadcast(d_top, top.broadcastable)
         d_top = top.type.filter_variable(d_top)
 
-        d_depth_height_width = (theano.gradient.DisconnectedType()(),)
+        d_depth_height_width = (aesara.gradient.DisconnectedType()(),)
         return (d_weights, d_top) + d_depth_height_width
 
 
@@ -3703,7 +3703,7 @@ def conv2d(
         GPU. Otherwise, it is the *CorrMM* convolution that will be used
         "caffe style convolution".
 
-        This is only supported in Theano 0.8 or the development
+        This is only supported in Aesara 0.8 or the development
         version until it is released.
 
         The parameter filter_dilation is an implementation of `dilated
@@ -3716,7 +3716,7 @@ def conv2d(
             "Keyword arguments 'imshp_logical' and 'kshp_logical' for conv2d "
             "are not supported anymore (and have not been a reliable way to "
             "perform upsampling). That feature is still available by calling "
-            "theano.tensor.nnet.conv.conv2d() for the time being."
+            "aesara.tensor.nnet.conv.conv2d() for the time being."
         )
     if len(kwargs.keys()) > 0:
         warnings.warn(
@@ -3789,7 +3789,7 @@ def conv2d_transpose(
 
     output_shape: tuple/list of len 4 of int or Constant variable
         The shape of the output of ``conv2d_transpose``. The last two elements
-        are allowed to be ``theano.tensor.type.scalar`` variables.
+        are allowed to be ``aesara.tensor.type.scalar`` variables.
 
     filter_shape: None, tuple/list of len 4 of int or Constant variable
         The shape of the filters parameter.
diff --git a/theano/tensor/nnet/basic.py b/aesara/tensor/nnet/basic.py
similarity index 97%
rename from theano/tensor/nnet/basic.py
rename to aesara/tensor/nnet/basic.py
index 536cde841d..4dbf6810bf 100644
--- a/theano/tensor/nnet/basic.py
+++ b/aesara/tensor/nnet/basic.py
@@ -18,29 +18,29 @@
 
 import numpy as np
 
-import theano
-from theano import scalar as ts
-from theano.assert_op import Assert
-from theano.compile import optdb
-from theano.configdefaults import config
-from theano.gradient import DisconnectedType, grad_not_implemented
-from theano.graph.basic import Apply
-from theano.graph.op import COp, Op
-from theano.graph.opt import copy_stack_trace, local_optimizer, optimizer
-from theano.scalar import UnaryScalarOp
-
-# Work-around for Python 3.6 issue that prevents `import theano.tensor as tt`
-from theano.tensor import basic as tt
-from theano.tensor import extra_ops, math_opt
-from theano.tensor.basic import ARange, as_tensor_variable
-from theano.tensor.basic_opt import (
+import aesara
+from aesara import scalar as ts
+from aesara.assert_op import Assert
+from aesara.compile import optdb
+from aesara.configdefaults import config
+from aesara.gradient import DisconnectedType, grad_not_implemented
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp, Op
+from aesara.graph.opt import copy_stack_trace, local_optimizer, optimizer
+from aesara.scalar import UnaryScalarOp
+
+# Work-around for Python 3.6 issue that prevents `import aesara.tensor as tt`
+from aesara.tensor import basic as tt
+from aesara.tensor import extra_ops, math_opt
+from aesara.tensor.basic import ARange, as_tensor_variable
+from aesara.tensor.basic_opt import (
     register_canonicalize,
     register_specialize,
     register_stabilize,
 )
-from theano.tensor.elemwise import DimShuffle, Elemwise
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import (
+from aesara.tensor.elemwise import DimShuffle, Elemwise
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import (
     MaxAndArgmax,
     Sum,
     add,
@@ -54,13 +54,13 @@
     neg,
     or_,
 )
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tanh, tensordot, true_div
-from theano.tensor.nnet.blocksparse import sparse_block_dot
-from theano.tensor.nnet.sigm import sigmoid, softplus
-from theano.tensor.shape import shape, shape_padleft
-from theano.tensor.subtensor import AdvancedIncSubtensor, AdvancedSubtensor, Subtensor
-from theano.tensor.type import (
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tanh, tensordot, true_div
+from aesara.tensor.nnet.blocksparse import sparse_block_dot
+from aesara.tensor.nnet.sigm import sigmoid, softplus
+from aesara.tensor.shape import shape, shape_padleft
+from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedSubtensor, Subtensor
+from aesara.tensor.type import (
     TensorType,
     discrete_dtypes,
     float_dtypes,
@@ -264,7 +264,7 @@ def c_code_template(dtype):
 
         # Get the vectorized version of exp if it exist
         try:
-            vec_exp = theano.scalar.exp.c_code_contiguous_raw(
+            vec_exp = aesara.scalar.exp.c_code_contiguous_raw(
                 dtype, "Nx[1]", "sm_i", "sm_i"
             )
             inside_row_loop_contig = (
@@ -303,7 +303,7 @@ def c_code_template(dtype):
             """
                 % locals()
             )
-        except theano.graph.utils.MethodNotDefined:
+        except aesara.graph.utils.MethodNotDefined:
             pass
         end_row_loop = """
         }
@@ -589,7 +589,7 @@ def c_code_template(dtype):
         """
         # Get the vectorized version of exp if it exist
         try:
-            vec_exp = theano.scalar.exp.c_code_contiguous_raw(
+            vec_exp = aesara.scalar.exp.c_code_contiguous_raw(
                 dtype, "Nx[1]", "sm_i", "sm_i"
             )
             inside_row_loop_contig = (
@@ -625,7 +625,7 @@ def c_code_template(dtype):
             """
                 % locals()
             )
-        except theano.graph.utils.MethodNotDefined:
+        except aesara.graph.utils.MethodNotDefined:
             pass
 
         end_row_loop = """
@@ -1689,12 +1689,12 @@ def local_argmax_pushdown(fgraph, node):
         )
     ):
         if config.warn__argmax_pushdown_bug:
-            logging.getLogger("theano.tensor.nnet.basic").warn(
-                "There was a bug in Theano fixed on May 27th, 2010 in this case."
+            logging.getLogger("aesara.tensor.nnet.basic").warn(
+                "There was a bug in Aesara fixed on May 27th, 2010 in this case."
                 " I.E. when we take the max of a softplus, softmax, exp, "
                 "log, tanh, sigmoid, softmax_with_bias op, we were doing "
                 "the max of the parent of the input. To remove this "
-                "warning set the Theano flags 'warn__argmax_pushdown_bug' "
+                "warning set the Aesara flags 'warn__argmax_pushdown_bug' "
                 "to False"
             )
 
@@ -2162,7 +2162,7 @@ def grad(inputs, out_grads):
 
     inp = [output, target]
     outp = softplus(-abs(output)) + output * ((output > 0) - target)
-    return theano.compile.builders.OpFromGraph(
+    return aesara.compile.builders.OpFromGraph(
         inp,
         [outp],
         grad_overrides=grad,
@@ -2464,9 +2464,9 @@ def h_softmax(
     The following example builds a simple hierarchical softmax layer.
 
     >>> import numpy as np
-    >>> import theano
-    >>> import theano.tensor as tt
-    >>> from theano.tensor.nnet import h_softmax
+    >>> import aesara
+    >>> import aesara.tensor as tt
+    >>> from aesara.tensor.nnet import h_softmax
     >>>
     >>> # Parameters
     >>> batch_size = 32
@@ -2477,16 +2477,16 @@ def h_softmax(
     >>> output_size = n_outputs_per_class * n_outputs_per_class
     >>>
     >>> # First level of h_softmax
-    >>> floatX = theano.config.floatX
-    >>> W1 = theano.shared(
+    >>> floatX = aesara.config.floatX
+    >>> W1 = aesara.shared(
     ...     np.random.normal(0, 0.001, (dim_x, n_classes)).astype(floatX))
-    >>> b1 = theano.shared(np.zeros((n_classes,), floatX))
+    >>> b1 = aesara.shared(np.zeros((n_classes,), floatX))
     >>>
     >>> # Second level of h_softmax
     >>> W2 = np.random.normal(0, 0.001,
     ...     size=(n_classes, dim_x, n_outputs_per_class)).astype(floatX)
-    >>> W2 = theano.shared(W2)
-    >>> b2 = theano.shared(np.zeros((n_classes, n_outputs_per_class), floatX))
+    >>> W2 = aesara.shared(W2)
+    >>> b2 = aesara.shared(np.zeros((n_classes, n_outputs_per_class), floatX))
     >>>
     >>> # We can now build the graph to compute a loss function, typically the
     >>> # negative log-likelihood:
@@ -2668,13 +2668,13 @@ def confusion_matrix(actual, pred):
 
     Examples
     --------
-    >>> import theano
-    >>> import theano.tensor as tt
-    >>> from theano.tensor.nnet import confusion_matrix
+    >>> import aesara
+    >>> import aesara.tensor as tt
+    >>> from aesara.tensor.nnet import confusion_matrix
 
     >>> x = tt.vector()
     >>> y = tt.vector()
-    >>> f = theano.function([x, y], confusion_matrix(x, y))
+    >>> f = aesara.function([x, y], confusion_matrix(x, y))
     >>> y_true = [2, 0, 2, 2, 0, 1]
     >>> y_pred = [0, 0, 2, 2, 0, 2]
     >>> print(f(y_true, y_pred))
diff --git a/theano/tensor/nnet/batchnorm.py b/aesara/tensor/nnet/batchnorm.py
similarity index 95%
rename from theano/tensor/nnet/batchnorm.py
rename to aesara/tensor/nnet/batchnorm.py
index 4812dd7df9..00b2fb304b 100644
--- a/theano/tensor/nnet/batchnorm.py
+++ b/aesara/tensor/nnet/batchnorm.py
@@ -1,18 +1,18 @@
 import numpy as np
 
-import theano
-from theano.configdefaults import config
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.graph.opt import copy_stack_trace, local_optimizer
-from theano.scalar import Composite, add, as_common_dtype, mul, sub, true_div
-from theano.tensor import basic as tt
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.basic_opt import register_specialize_device
-from theano.tensor.elemwise import Elemwise
-from theano.tensor.math import inv, mean, prod, sqrt
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.type import TensorType
+import aesara
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.graph.opt import copy_stack_trace, local_optimizer
+from aesara.scalar import Composite, add, as_common_dtype, mul, sub, true_div
+from aesara.tensor import basic as tt
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.basic_opt import register_specialize_device
+from aesara.tensor.elemwise import Elemwise
+from aesara.tensor.math import inv, mean, prod, sqrt
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.type import TensorType
 
 
 class BNComposite(Composite):
@@ -21,11 +21,11 @@ class BNComposite(Composite):
     @config.change_flags(compute_test_value="off")
     def __init__(self, dtype):
         self.dtype = dtype
-        x = theano.scalar.Scalar(dtype=dtype).make_variable()
-        mean = theano.scalar.Scalar(dtype=dtype).make_variable()
-        std = theano.scalar.Scalar(dtype=dtype).make_variable()
-        gamma = theano.scalar.Scalar(dtype=dtype).make_variable()
-        beta = theano.scalar.Scalar(dtype=dtype).make_variable()
+        x = aesara.scalar.Scalar(dtype=dtype).make_variable()
+        mean = aesara.scalar.Scalar(dtype=dtype).make_variable()
+        std = aesara.scalar.Scalar(dtype=dtype).make_variable()
+        gamma = aesara.scalar.Scalar(dtype=dtype).make_variable()
+        beta = aesara.scalar.Scalar(dtype=dtype).make_variable()
         o = add(mul(true_div(sub(x, mean), std), gamma), beta)
         inputs = [x, mean, std, gamma, beta]
         outputs = [o]
@@ -483,12 +483,12 @@ def L_op(self, inputs, outputs, grads):
         dy = grads[0]
         _, x_mean, x_invstd = outputs[:3]
         disconnected_outputs = [
-            theano.gradient.DisconnectedType()(),  # epsilon
-            theano.gradient.DisconnectedType()(),
+            aesara.gradient.DisconnectedType()(),  # epsilon
+            aesara.gradient.DisconnectedType()(),
         ]  # running_average_factor
         # Optional running_mean and running_var.
         for i in range(5, len(inputs)):
-            disconnected_outputs.append(theano.gradient.DisconnectedType()())
+            disconnected_outputs.append(aesara.gradient.DisconnectedType()())
         return (
             AbstractBatchNormTrainGrad(self.axes)(
                 x, dy, scale, x_mean, x_invstd, epsilon
@@ -626,7 +626,7 @@ def grad(self, inputs, grads):
         dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * (
             scale / (two * est_var_eps * est_std)
         )
-        return [dx, dscale, dbias, dmean, dvar, theano.gradient.DisconnectedType()()]
+        return [dx, dscale, dbias, dmean, dvar, aesara.gradient.DisconnectedType()()]
 
     def connection_pattern(self, node):
         # Specificy that epsilon is not connected to outputs.
@@ -681,7 +681,7 @@ def grad(self, inp, grads):
         g_wrt_x_mean = 0
         g_wrt_x_invstd = 0
 
-        if not isinstance(ddinputs.type, theano.gradient.DisconnectedType):
+        if not isinstance(ddinputs.type, aesara.gradient.DisconnectedType):
             ccc = scale * (ddinputs - mean(ddinputs, axis=self.axes, keepdims=True))
             ddd = (x_invstd ** 3) * (
                 ccc * mean(dy * x_diff, axis=self.axes, keepdims=True)
@@ -712,7 +712,7 @@ def grad(self, inp, grads):
                 keepdims=True,
             )
 
-        if not isinstance(ddscale.type, theano.gradient.DisconnectedType):
+        if not isinstance(ddscale.type, aesara.gradient.DisconnectedType):
             g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
             g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
             g_wrt_x_mean = g_wrt_x_mean - (
@@ -722,7 +722,7 @@ def grad(self, inp, grads):
                 ddscale * tt_sum(dy * x_diff, axis=self.axes, keepdims=True)
             )
 
-        if not isinstance(ddbias.type, theano.gradient.DisconnectedType):
+        if not isinstance(ddbias.type, aesara.gradient.DisconnectedType):
             g_wrt_dy = g_wrt_dy + tt.fill(dy, ddbias)
 
         # depending on which output gradients are given,
@@ -733,10 +733,10 @@ def grad(self, inp, grads):
             g_wrt_scale,
             g_wrt_x_mean,
             g_wrt_x_invstd,
-            theano.gradient.DisconnectedType()(),
+            aesara.gradient.DisconnectedType()(),
         ]
         return [
-            theano.gradient.DisconnectedType()() if (type(r) == int and r == 0) else r
+            aesara.gradient.DisconnectedType()() if (type(r) == int and r == 0) else r
             for r in results
         ]
 
@@ -827,7 +827,7 @@ def local_abstract_batch_norm_train(fgraph, node):
         for (r, r_orig) in zip(results, node.outputs)
     ]
 
-    for var in theano.graph.basic.vars_between(node.inputs, results):
+    for var in aesara.graph.basic.vars_between(node.inputs, results):
         if var not in node.inputs:
             copy_stack_trace(node.outputs[0], var)
     return results
@@ -866,7 +866,7 @@ def local_abstract_batch_norm_train_grad(fgraph, node):
         for (r, r_orig) in zip(results, node.outputs)
     ]
 
-    for var in theano.graph.basic.vars_between(node.inputs, results):
+    for var in aesara.graph.basic.vars_between(node.inputs, results):
         if var not in node.inputs:
             copy_stack_trace(node.outputs[0], var)
     return results
@@ -896,14 +896,14 @@ def local_abstract_batch_norm_inference(fgraph, node):
     result = (x - estimated_mean) * (scale / sqrt(estimated_variance + epsilon)) + bias
     result = tt.patternbroadcast(result, node.outputs[0].broadcastable)
 
-    for var in theano.graph.basic.vars_between(node.inputs, [result]):
+    for var in aesara.graph.basic.vars_between(node.inputs, [result]):
         if var not in node.inputs:
             copy_stack_trace(node.outputs[0], var)
     return [result]
 
 
 # Register Cpu Optmization
-bn_groupopt = theano.graph.optdb.LocalGroupDB()
+bn_groupopt = aesara.graph.optdb.LocalGroupDB()
 bn_groupopt.__name__ = "batchnorm_opts"
 register_specialize_device(bn_groupopt, "fast_compile", "fast_run")
 
diff --git a/theano/tensor/nnet/blocksparse.py b/aesara/tensor/nnet/blocksparse.py
similarity index 92%
rename from theano/tensor/nnet/blocksparse.py
rename to aesara/tensor/nnet/blocksparse.py
index 2555d9323b..369b6f97b4 100644
--- a/theano/tensor/nnet/blocksparse.py
+++ b/aesara/tensor/nnet/blocksparse.py
@@ -1,10 +1,10 @@
 import numpy as np
 
-import theano
-from theano.gradient import grad_undefined
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor.type import discrete_dtypes
+import aesara
+from aesara.gradient import grad_undefined
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor.type import discrete_dtypes
 
 
 class SparseBlockGemv(Op):
@@ -73,11 +73,11 @@ def make_node(self, o, W, h, inputIdx, outputIdx):
             Which blocks will be computed is specified in `outputIdx`.
 
         """
-        o = theano.tensor.as_tensor_variable(o)
-        W = theano.tensor.as_tensor_variable(W)
-        h = theano.tensor.as_tensor_variable(h)
-        inputIdx = theano.tensor.as_tensor_variable(inputIdx)
-        outputIdx = theano.tensor.as_tensor_variable(outputIdx)
+        o = aesara.tensor.as_tensor_variable(o)
+        W = aesara.tensor.as_tensor_variable(W)
+        h = aesara.tensor.as_tensor_variable(h)
+        inputIdx = aesara.tensor.as_tensor_variable(inputIdx)
+        outputIdx = aesara.tensor.as_tensor_variable(outputIdx)
 
         if o.ndim != 3:
             raise TypeError("The output o must be a 2D tensor")
@@ -190,10 +190,10 @@ def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
           Which blocks will be computed is specified in `yIdx`.
 
         """
-        one = theano.tensor.constant(np.asarray(1.0, dtype="float32"))
-        o = theano.tensor.as_tensor_variable(o)
-        x = theano.tensor.as_tensor_variable(x)
-        y = theano.tensor.as_tensor_variable(y)
+        one = aesara.tensor.constant(np.asarray(1.0, dtype="float32"))
+        o = aesara.tensor.as_tensor_variable(o)
+        x = aesara.tensor.as_tensor_variable(x)
+        y = aesara.tensor.as_tensor_variable(y)
 
         if alpha is None:
             alpha = one
diff --git a/theano/tensor/nnet/c_code/corr3d_gemm.c b/aesara/tensor/nnet/c_code/corr3d_gemm.c
similarity index 99%
rename from theano/tensor/nnet/c_code/corr3d_gemm.c
rename to aesara/tensor/nnet/c_code/corr3d_gemm.c
index 3887ffd8b9..548640d6b2 100644
--- a/theano/tensor/nnet/c_code/corr3d_gemm.c
+++ b/aesara/tensor/nnet/c_code/corr3d_gemm.c
@@ -67,7 +67,7 @@ void im3d2col(const %(float_type)s* data_im, const int channels,
   }
 }
 
-// Unlike the Caffe and Theano GPU verions, the data_im array is set to zero
+// Unlike the Caffe and Aesara GPU verions, the data_im array is set to zero
 // before the col2im call rather than doing it here. So, the result is just
 // accumulated into data_im.
 void col2im3d(const %(float_type)s* data_col, const int channels,
@@ -109,7 +109,7 @@ void col2im3d(const %(float_type)s* data_col, const int channels,
 }
 
 
-// Theano op code
+// Aesara op code
 // GPU version authors: Arjun Jain, Frederic Bastien, Jan Schlueter
 // Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
 //   and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
@@ -237,7 +237,7 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
     PyArrayObject* col = (PyArrayObject*)PyArray_ZEROS(3,
             col_dim,
             PyArray_TYPE(top),
-            0); 
+            0);
     if (NULL == col) {
         PyErr_Format(PyExc_RuntimeError,
                 "Corr3dMM failed to allocate working memory of"
@@ -324,7 +324,7 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
                     weight_dim[0], weight_dim[1]);
             return NULL;
         }
-        
+
         // valid convolution: im2col, then gemm
         // Iterate over batch
         int blas_threads_saved = %(blas_get_num_threads)s;
@@ -367,7 +367,7 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
         %(omp_flags)s
         for(int j = 0; j < weight_dim[1]; ++j){
             for(int i = 0; i < max_threads; ++i){
-                ((%(float_type)s*)PyArray_DATA(weight))[j] += 
+                ((%(float_type)s*)PyArray_DATA(weight))[j] +=
                     *((%(float_type)s*)PyArray_DATA(local_weight) +
                     i * weight_dim[1] + j);
             }
@@ -419,4 +419,3 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
     // in here output is just aliased to one of bottom, weights, or top.
     return output;
 }
-
diff --git a/theano/tensor/nnet/c_code/corr_gemm.c b/aesara/tensor/nnet/c_code/corr_gemm.c
similarity index 98%
rename from theano/tensor/nnet/c_code/corr_gemm.c
rename to aesara/tensor/nnet/c_code/corr_gemm.c
index 7f22f03daa..2a1f6996d1 100644
--- a/theano/tensor/nnet/c_code/corr_gemm.c
+++ b/aesara/tensor/nnet/c_code/corr_gemm.c
@@ -58,7 +58,7 @@ void im2col(const %(float_type)s* data_im, const int channels,
   }
 }
 
-// Unlike the Caffe and Theano GPU verions, the data_im array is set to zero
+// Unlike the Caffe and Aesara GPU verions, the data_im array is set to zero
 // before the col2im call rather than doing it here. So, the result is just
 // accumulated into data_im.
 void col2im(const %(float_type)s* data_col, const int channels,
@@ -92,7 +92,7 @@ void col2im(const %(float_type)s* data_col, const int channels,
 }
 
 
-// Theano op code
+// Aesara op code
 // GPU version authors: Arjun Jain, Frederic Bastien, Jan Schlueter
 // Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
 //   and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
@@ -234,7 +234,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
                     batchSize, nFilters, topHeight, topWidth);
             return NULL;
         }
-    }        
+    }
 
     // Create temporary columns
     int max_threads = %(omp_get_max_threads)s;
@@ -250,7 +250,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
     PyArrayObject* col = (PyArrayObject*)PyArray_ZEROS(3,
             col_dim,
             PyArray_TYPE(top),
-            0); 
+            0);
     if (NULL == col) {
         PyErr_Format(PyExc_RuntimeError,
                 "CorrMM failed to allocate working memory of"
@@ -330,7 +330,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
                            (%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride, &N_,
                            (%(float_type)s*)PyArray_DATA(weight) + g * group_weight_stride, &K_,
                            &zero,
-                           (%(float_type)s*)PyArray_DATA(top) + n * batch_top_stride + g * group_top_stride, &N_);  
+                           (%(float_type)s*)PyArray_DATA(top) + n * batch_top_stride + g * group_top_stride, &N_);
                 }
             }
         }
@@ -339,7 +339,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
 
         /*
         // Original caffe code for comparison
-        // Note that this code was translated from the Theano GPU code,
+        // Note that this code was translated from the Aesara GPU code,
         // not the Caffe CPU code.
         // https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
         // Note that this is for grouped convolution; we can ignore groups here,
@@ -374,7 +374,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
         npy_intp weight_dim[2];
         weight_dim[0] = (npy_intp)max_threads;
         if (unshared)
-            weight_dim[1] = (npy_intp)(M_ * N_ * K_ * numgroups);            
+            weight_dim[1] = (npy_intp)(M_ * N_ * K_ * numgroups);
         else
             weight_dim[1] = (npy_intp)(M_ * K_ * numgroups);
         PyArrayObject* local_weight = (PyArrayObject*)PyArray_ZEROS(2,
@@ -387,7 +387,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
                     weight_dim[0], weight_dim[1]);
             return NULL;
         }
-        
+
         // valid convolution: im2col, then gemm
         // Iterate over batch
         int blas_threads_saved = %(blas_get_num_threads)s;
@@ -431,7 +431,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
                            (%(float_type)s*)PyArray_DATA(col) + tid * col_stride + g * group_col_stride, &N_,
                            (%(float_type)s*)PyArray_DATA(top) + g * group_top_stride  + n * batch_top_stride, &N_,
                            (n == 0) ? &zero : &one,
-                           (%(float_type)s*)PyArray_DATA(local_weight) + g * group_weight_stride + 
+                           (%(float_type)s*)PyArray_DATA(local_weight) + g * group_weight_stride +
                            tid * weight_dim[1], &K_);
                 }
             }
@@ -448,7 +448,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
         %(omp_flags)s
         for(int j = 0; j < weight_dim[1]; ++j){
             for(int i = 0; i < max_threads; ++i){
-                ((%(float_type)s*)PyArray_DATA(weight))[j] += 
+                ((%(float_type)s*)PyArray_DATA(weight))[j] +=
                     *((%(float_type)s*)PyArray_DATA(local_weight) +
                     i * weight_dim[1] + j);
             }
@@ -456,7 +456,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
         Py_DECREF(local_weight);
         /*
         // Original caffe code for comparison
-        // Note that this code was translated from the Theano GPU code,
+        // Note that this code was translated from the Aesara GPU code,
         // not the Caffe CPU code.
         // https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
         // Note that this is for grouped convolution; we can ignore groups
@@ -529,7 +529,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
         %(blas_set_num_threads)s(blas_threads_saved);
         /*
         // Original caffe code for comparison
-        // Note that this code was translated from the Theano GPU code,
+        // Note that this code was translated from the Aesara GPU code,
         // not the Caffe CPU code.
         // https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
         for (int n = 0; n < num_; ++n) {
diff --git a/theano/tensor/nnet/c_code/ctc_wrapper.c b/aesara/tensor/nnet/c_code/ctc_wrapper.c
similarity index 100%
rename from theano/tensor/nnet/c_code/ctc_wrapper.c
rename to aesara/tensor/nnet/c_code/ctc_wrapper.c
diff --git a/theano/tensor/nnet/conv.py b/aesara/tensor/nnet/conv.py
similarity index 99%
rename from theano/tensor/nnet/conv.py
rename to aesara/tensor/nnet/conv.py
index 77b4285bfd..921f24bcfc 100644
--- a/theano/tensor/nnet/conv.py
+++ b/aesara/tensor/nnet/conv.py
@@ -14,18 +14,18 @@
 
 import numpy as np
 
-import theano
-from theano.graph.basic import Apply
-from theano.graph.op import OpenMPOp
-from theano.tensor import blas
-from theano.tensor.basic import (
+import aesara
+from aesara.graph.basic import Apply
+from aesara.graph.op import OpenMPOp
+from aesara.tensor import blas
+from aesara.tensor.basic import (
     as_tensor_variable,
     get_scalar_constant_value,
     patternbroadcast,
 )
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.nnet.abstract_conv import get_conv_output_shape, get_conv_shape_1axis
-from theano.tensor.type import discrete_dtypes, tensor
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.nnet.abstract_conv import get_conv_output_shape, get_conv_shape_1axis
+from aesara.tensor.type import discrete_dtypes, tensor
 
 
 try:
@@ -39,7 +39,7 @@
     imported_scipy_signal = False
 
 __docformat__ = "restructuredtext en"
-_logger = logging.getLogger("theano.tensor.nnet.conv")
+_logger = logging.getLogger("aesara.tensor.nnet.conv")
 
 
 def conv2d(
@@ -108,8 +108,8 @@ def conv2d(
     """
 
     warnings.warn(
-        "theano.tensor.nnet.conv.conv2d is deprecated."
-        " Use theano.tensor.nnet.conv2d instead."
+        "aesara.tensor.nnet.conv.conv2d is deprecated."
+        " Use aesara.tensor.nnet.conv2d instead."
     )
 
     # accept Constant value for image_shape and filter_shape.
@@ -315,7 +315,7 @@ class ConvOp(OpenMPOp):
 
     # the value of speed_unroll_batch_kern,speed_unroll_patch_noshape,speed_unroll_patch_shape
     # have bean calculated on maggie36 when their is only 1 session logged on and only this was running.
-    # It is an Intel(R) Xeon(R) CPU E5430 @ 2.66GHz. It is computer with theano/tensor/nnet/tests/speed_test_conv.py
+    # It is an Intel(R) Xeon(R) CPU E5430 @ 2.66GHz. It is computer with aesara/tensor/nnet/tests/speed_test_conv.py
     # and took 5 minutes to run.
     # TODO: we should compute this table for each computer/os as this can change.
     #      I saw on one computer that the speed with the shape can be slower than without!
@@ -809,7 +809,7 @@ def perform(self, node, inp, out):
         img2d, filtersflipped = inp
         (z,) = out
         if not imported_scipy_signal:
-            raise theano.graph.utils.MethodNotDefined(
+            raise aesara.graph.utils.MethodNotDefined(
                 "c_headers",
                 type(self),
                 self.__class__.__name__,
@@ -1025,7 +1025,7 @@ def grad(self, inp, grads):
         if self.out_mode == "valid" and (self.dx, self.dy) != (1, 1):
             raise NotImplementedError(
                 "ERROR: ConvOp.grad is now disabled for 'valid' convolutions with"
-                " stride != (1, 1); call theano.tensor.nnet.conv2d() instead."
+                " stride != (1, 1); call aesara.tensor.nnet.conv2d() instead."
             )
 
         if self.dx not in (1, 2) or self.dy not in (1, 2):
@@ -1185,7 +1185,7 @@ def c_libraries(self, **kwargs):
     def c_no_compile_args(self, **kwargs):
         # when the ksph==(1,1) gcc 4.3.0 segfault during the
         # compilation with -O3.  This don't happen at -O2
-        if theano.link.c.cmodule.gcc_version() in ["4.3.0"] and self.kshp == (1, 1):
+        if aesara.link.c.cmodule.gcc_version() in ["4.3.0"] and self.kshp == (1, 1):
             return ["-O3"]
         else:
             return []
@@ -1195,7 +1195,7 @@ def c_compile_args(self, **kwargs):
 
         if self.use_blas():
             ret = blas.ldflags(libs=False, flags=True)
-        if theano.link.c.cmodule.gcc_version() in ["4.3.0"] and self.kshp == (1, 1):
+        if aesara.link.c.cmodule.gcc_version() in ["4.3.0"] and self.kshp == (1, 1):
             ret += ["-O2"]
         # Add the -fopenmp flags
         ret += super().c_compile_args(**kwargs)
diff --git a/theano/tensor/nnet/conv3d2d.py b/aesara/tensor/nnet/conv3d2d.py
similarity index 94%
rename from theano/tensor/nnet/conv3d2d.py
rename to aesara/tensor/nnet/conv3d2d.py
index 193d01d1b0..6c2b756396 100644
--- a/theano/tensor/nnet/conv3d2d.py
+++ b/aesara/tensor/nnet/conv3d2d.py
@@ -1,9 +1,9 @@
-import theano
-from theano import tensor as tt
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.graph.opt import TopoOptimizer, copy_stack_trace, local_optimizer
+import aesara
+from aesara import tensor as tt
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.graph.opt import TopoOptimizer, copy_stack_trace, local_optimizer
 
 
 def get_diagonal_subtensor_view(x, i0, i1):
@@ -209,8 +209,8 @@ def conv3d(
     See Also
     --------
     Someone made a script that shows how to swap the axes between
-    both 3d convolution implementations in Theano. See the last
-    `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
+    both 3d convolution implementations in Aesara. See the last
+    `attachment <https://groups.google.com/d/msg/aesara-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
 
     """
 
@@ -242,7 +242,7 @@ def conv3d(
     if filters_shape is None:
         conv2d_filter_shape = None
 
-    out_4d = theano.tensor.nnet.conv2d(
+    out_4d = aesara.tensor.nnet.conv2d(
         signals.reshape(_signals_shape_4d),
         filters.reshape(_filters_shape_4d),
         input_shape=conv2d_signal_shape,
@@ -293,7 +293,7 @@ def conv3d(
             out_tmp_padded = tt.zeros(
                 dtype=out_tmp.dtype, shape=(Ns, Ts + 2 * Tpad, Nf, Tf, Hout, Wout)
             )
-            out_tmp_padded = theano.tensor.subtensor.set_subtensor(
+            out_tmp_padded = aesara.tensor.subtensor.set_subtensor(
                 out_tmp_padded[:, Tpad : (Ts + Tpad), :, :, :, :], out_tmp
             )
             out_5d = diagonal_subtensor(out_tmp_padded, 1, 3).sum(axis=3)
@@ -315,7 +315,7 @@ def local_inplace_DiagonalSubtensor(fgraph, node):
     return False
 
 
-theano.compile.optdb.register(
+aesara.compile.optdb.register(
     "local_inplace_DiagonalSubtensor",
     TopoOptimizer(
         local_inplace_DiagonalSubtensor, failure_callback=TopoOptimizer.warn_inplace
diff --git a/theano/tensor/nnet/corr.py b/aesara/tensor/nnet/corr.py
similarity index 97%
rename from theano/tensor/nnet/corr.py
rename to aesara/tensor/nnet/corr.py
index d5c1139293..3f1f71085d 100644
--- a/theano/tensor/nnet/corr.py
+++ b/aesara/tensor/nnet/corr.py
@@ -1,18 +1,18 @@
 import logging
 import os
 
-import theano
-from theano.configdefaults import config
-from theano.graph.basic import Apply
-from theano.graph.op import OpenMPOp, _NoPythonOp
-from theano.graph.params_type import ParamsType
-from theano.graph.type import EnumList
-from theano.scalar import int8, int64
-from theano.tensor import blas_headers
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.blas import blas_header_version, ldflags
-from theano.tensor.nnet.abstract_conv import get_conv_output_shape
-from theano.tensor.type import TensorType
+import aesara
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply
+from aesara.graph.op import OpenMPOp, _NoPythonOp
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import EnumList
+from aesara.scalar import int8, int64
+from aesara.tensor import blas_headers
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.blas import blas_header_version, ldflags
+from aesara.tensor.nnet.abstract_conv import get_conv_output_shape
+from aesara.tensor.type import TensorType
 
 
 _logger = logging.getLogger(__name__)
@@ -127,7 +127,7 @@ def __init__(
         self.unshared = unshared
 
         if not config.blas__ldflags:
-            # Theano will use a NumPy C implementation of [sd]gemm_ instead.
+            # Aesara will use a NumPy C implementation of [sd]gemm_ instead.
             self.blas_type = ""
         else:
             if "openblas" in config.blas__ldflags:
@@ -188,7 +188,7 @@ def as_common_dtype(in1, in2):
         """
         Upcast input variables if necessary.
         """
-        dtype = theano.scalar.upcast(in1.dtype, in2.dtype)
+        dtype = aesara.scalar.upcast(in1.dtype, in2.dtype)
         return in1.astype(dtype), in2.astype(dtype)
 
     def __setstate__(self, d):
@@ -739,7 +739,7 @@ class CorrMM_gradWeights(BaseCorrMM):
     Notes
     -----
     You will not want to use this directly, but rely on
-    Theano's automatic differentiation or graph optimization to
+    Aesara's automatic differentiation or graph optimization to
     use it as needed.
 
     """
@@ -855,7 +855,7 @@ def grad(self, inp, grads):
             self.unshared,
         )(bottom, weights)
         d_height_width = (
-            (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
+            (aesara.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
         )
         return (d_bottom, d_top) + d_height_width
 
@@ -873,7 +873,7 @@ class CorrMM_gradInputs(BaseCorrMM):
     Notes
     -----
     You will not want to use this directly, but rely on
-    Theano's automatic differentiation or graph optimization to
+    Aesara's automatic differentiation or graph optimization to
     use it as needed.
 
     """
@@ -987,7 +987,7 @@ def grad(self, inp, grads):
             self.unshared,
         )(bottom, weights)
         d_height_width = (
-            (theano.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
+            (aesara.gradient.DisconnectedType()(),) * 2 if len(inp) == 4 else ()
         )
         return (d_weights, d_top) + d_height_width
 
diff --git a/theano/tensor/nnet/corr3d.py b/aesara/tensor/nnet/corr3d.py
similarity index 97%
rename from theano/tensor/nnet/corr3d.py
rename to aesara/tensor/nnet/corr3d.py
index eb367ac53f..2a5e33b36e 100644
--- a/theano/tensor/nnet/corr3d.py
+++ b/aesara/tensor/nnet/corr3d.py
@@ -1,18 +1,18 @@
 import logging
 import os
 
-import theano
-from theano.configdefaults import config
-from theano.graph.basic import Apply
-from theano.graph.op import OpenMPOp, _NoPythonOp
-from theano.graph.params_type import ParamsType
-from theano.graph.type import EnumList
-from theano.scalar import int64
-from theano.tensor import blas_headers
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.blas import blas_header_version, ldflags
-from theano.tensor.nnet.abstract_conv import get_conv_output_shape
-from theano.tensor.type import TensorType
+import aesara
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply
+from aesara.graph.op import OpenMPOp, _NoPythonOp
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import EnumList
+from aesara.scalar import int64
+from aesara.tensor import blas_headers
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.blas import blas_header_version, ldflags
+from aesara.tensor.nnet.abstract_conv import get_conv_output_shape
+from aesara.tensor.type import TensorType
 
 
 _logger = logging.getLogger(__name__)
@@ -113,7 +113,7 @@ def __init__(
         self.num_groups = num_groups
 
         if not config.blas__ldflags:
-            # Theano will use a NumPy C implementation of [sd]gemm_ instead.
+            # Aesara will use a NumPy C implementation of [sd]gemm_ instead.
             self.blas_type = ""
         else:
             if "openblas" in config.blas__ldflags:
@@ -171,7 +171,7 @@ def as_common_dtype(in1, in2):
         """
         Upcast input variables if necessary.
         """
-        dtype = theano.scalar.upcast(in1.dtype, in2.dtype)
+        dtype = aesara.scalar.upcast(in1.dtype, in2.dtype)
         return in1.astype(dtype), in2.astype(dtype)
 
     def __setstate__(self, d):
@@ -677,7 +677,7 @@ class Corr3dMMGradWeights(BaseCorr3dMM):
     Notes
     -----
     You will not want to use this directly, but rely on
-    Theano's automatic differentiation or graph optimization to
+    Aesara's automatic differentiation or graph optimization to
     use it as needed.
 
     """
@@ -782,7 +782,7 @@ def grad(self, inp, grads):
             num_groups=self.num_groups,
         )(bottom, weights)
         d_height_width_depth = (
-            (theano.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
+            (aesara.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
         )
         return (d_bottom, d_top) + d_height_width_depth
 
@@ -800,7 +800,7 @@ class Corr3dMMGradInputs(BaseCorr3dMM):
     Notes
     -----
     You will not want to use this directly, but rely on
-    Theano's automatic differentiation or graph optimization to
+    Aesara's automatic differentiation or graph optimization to
     use it as needed.
 
     """
@@ -916,7 +916,7 @@ def grad(self, inp, grads):
             num_groups=self.num_groups,
         )(bottom, weights)
         d_height_width_depth = (
-            (theano.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
+            (aesara.gradient.DisconnectedType()(),) * 3 if len(inp) == 5 else ()
         )
         return (d_weights, d_top) + d_height_width_depth
 
diff --git a/theano/tensor/nnet/ctc.py b/aesara/tensor/nnet/ctc.py
similarity index 94%
rename from theano/tensor/nnet/ctc.py
rename to aesara/tensor/nnet/ctc.py
index 41d33e8359..1b0179f04a 100644
--- a/theano/tensor/nnet/ctc.py
+++ b/aesara/tensor/nnet/ctc.py
@@ -1,17 +1,17 @@
 import os
 import sys
 
-import theano.tensor as tt
-from theano.configdefaults import config
-from theano.gradient import grad_undefined
-from theano.graph.basic import Apply
-from theano.graph.op import ExternalCOp, OpenMPOp
-from theano.graph.opt import local_optimizer
-from theano.link.c.cmodule import GCC_compiler
-from theano.tensor.basic_opt import register_canonicalize
-from theano.tensor.blas import batched_dot
-from theano.tensor.extra_ops import cpu_contiguous
-from theano.tensor.type import ftensor3, fvector
+import aesara.tensor as tt
+from aesara.configdefaults import config
+from aesara.gradient import grad_undefined
+from aesara.graph.basic import Apply
+from aesara.graph.op import ExternalCOp, OpenMPOp
+from aesara.graph.opt import local_optimizer
+from aesara.link.c.cmodule import GCC_compiler
+from aesara.tensor.basic_opt import register_canonicalize
+from aesara.tensor.blas import batched_dot
+from aesara.tensor.extra_ops import cpu_contiguous
+from aesara.tensor.type import ftensor3, fvector
 
 
 def _ctc_find_lib():
diff --git a/theano/tensor/nnet/neighbours.py b/aesara/tensor/nnet/neighbours.py
similarity index 95%
rename from theano/tensor/nnet/neighbours.py
rename to aesara/tensor/nnet/neighbours.py
index c0c3b5169a..37e140c6b6 100644
--- a/theano/tensor/nnet/neighbours.py
+++ b/aesara/tensor/nnet/neighbours.py
@@ -4,15 +4,15 @@
 """
 import numpy as np
 
-import theano
-from theano.gradient import grad_not_implemented, grad_undefined
-from theano.graph.basic import Apply
-from theano.graph.op import COp
-from theano.graph.type import EnumList
-from theano.tensor.basic import arange, as_tensor_variable, concatenate, stack, zeros
-from theano.tensor.math import ceil_intdiv
-from theano.tensor.subtensor import inc_subtensor, set_subtensor
-from theano.tensor.type import matrix
+import aesara
+from aesara.gradient import grad_not_implemented, grad_undefined
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp
+from aesara.graph.type import EnumList
+from aesara.tensor.basic import arange, as_tensor_variable, concatenate, stack, zeros
+from aesara.tensor.math import ceil_intdiv
+from aesara.tensor.subtensor import inc_subtensor, set_subtensor
+from aesara.tensor.type import matrix
 
 
 class Images2Neibs(COp):
@@ -126,7 +126,7 @@ def grad(self, inp, grads):
                 neib_shape is neib_step
                 or neib_shape == neib_step
                 or
-                # Theano Constant == do not compare the data
+                # Aesara Constant == do not compare the data
                 # the equals function do that.
                 (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step))
             ):
@@ -169,7 +169,7 @@ def pos2map(pidx, pgz, prior_result, neib_shape, neib_step):
 
             indices = arange(neib_shape[0] * neib_shape[1])
             pgzs = gz.dimshuffle((1, 0))
-            result, _ = theano.scan(
+            result, _ = aesara.scan(
                 fn=pos2map,
                 sequences=[indices, pgzs],
                 outputs_info=zeros(x.shape),
@@ -196,7 +196,7 @@ def perform(self, node, inp, out_, params):
         (z,) = out_
         # GpuImages2Neibs should not run this perform in DebugMode
         if type(self) != Images2Neibs:
-            raise theano.graph.utils.MethodNotDefined()
+            raise aesara.graph.utils.MethodNotDefined()
 
         def CEIL_INTDIV(a, b):
             if a % b:
@@ -646,7 +646,7 @@ def c_code(self, node, name, inp, out, sub):
 
 def images2neibs(ten4, neib_shape, neib_step=None, mode="valid"):
     r"""
-    Function :func:`images2neibs <theano.tensor.nnet.neighbours.images2neibs>`
+    Function :func:`images2neibs <aesara.tensor.nnet.neighbours.images2neibs>`
     allows to apply a sliding window operation to a tensor containing
     images or other two-dimensional objects.
     The sliding window operation loops over points in input data and stores
@@ -722,11 +722,11 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode="valid"):
     .. code-block:: python
 
         # Defining variables
-        images = theano.tensor.type.tensor4('images')
+        images = aesara.tensor.type.tensor4('images')
         neibs = images2neibs(images, neib_shape=(5, 5))
 
-        # Constructing theano function
-        window_function = theano.function([images], neibs)
+        # Constructing aesara function
+        window_function = aesara.function([images], neibs)
 
         # Input tensor (one image 10x10)
         im_val = np.arange(100.).reshape((1, 1, 10, 10))
@@ -743,29 +743,29 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode="valid"):
 
 def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
     """
-    Function :func:`neibs2images <theano.sandbox.neighbours.neibs2images>`
+    Function :func:`neibs2images <aesara.sandbox.neighbours.neibs2images>`
     performs the inverse operation of
-    :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`. It inputs
-    the output of :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
+    :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`. It inputs
+    the output of :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`
     and reconstructs its input.
 
     Parameters
     ----------
     neibs : 2d tensor
         Like the one obtained by
-        :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`.
+        :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`.
     neib_shape
         `neib_shape` that was used in
-        :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`.
+        :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`.
     original_shape
         Original shape of the 4d tensor given to
-        :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
+        :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`
 
     Returns
     -------
     object
         Reconstructs the input of
-        :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`,
+        :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`,
         a 4d tensor of shape `original_shape`.
 
     Notes
@@ -773,21 +773,21 @@ def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
     Currently, the function doesn't support tensors created with
     `neib_step` different from default value. This means that it may be
     impossible to compute the gradient of a variable gained by
-    :func:`images2neibs <theano.sandbox.neigbours.neibs2images>` w.r.t.
+    :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` w.r.t.
     its inputs in this case, because it uses
-    :func:`images2neibs <theano.sandbox.neigbours.neibs2images>` for
+    :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` for
     gradient computation.
 
     Examples
     --------
     Example, which uses a tensor gained in example for
-    :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`:
+    :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`:
 
     .. code-block:: python
 
         im_new = neibs2images(neibs, (5, 5), im_val.shape)
-        # Theano function definition
-        inv_window = theano.function([neibs], im_new)
+        # Aesara function definition
+        inv_window = aesara.function([neibs], im_new)
         # Function application
         im_new_val = inv_window(neibs_val)
 
diff --git a/theano/tensor/nnet/opt.py b/aesara/tensor/nnet/opt.py
similarity index 92%
rename from theano/tensor/nnet/opt.py
rename to aesara/tensor/nnet/opt.py
index e3dd0e3636..2ca5b7d956 100644
--- a/theano/tensor/nnet/opt.py
+++ b/aesara/tensor/nnet/opt.py
@@ -2,19 +2,19 @@
 Optimizations addressing the ops in nnet root directory
 """
 
-import theano
-from theano import compile
-from theano.compile import optdb
-from theano.configdefaults import config
-from theano.graph.opt import (
+import aesara
+from aesara import compile
+from aesara.compile import optdb
+from aesara.configdefaults import config
+from aesara.graph.opt import (
     LocalMetaOptimizerSkipAssertionError,
     TopoOptimizer,
     copy_stack_trace,
     in2out,
     local_optimizer,
 )
-from theano.tensor.basic_opt import register_specialize_device
-from theano.tensor.nnet.abstract_conv import (
+from aesara.tensor.basic_opt import register_specialize_device
+from aesara.tensor.nnet.abstract_conv import (
     AbstractConv2d,
     AbstractConv2d_gradInputs,
     AbstractConv2d_gradWeights,
@@ -23,7 +23,7 @@
     AbstractConv3d_gradWeights,
     get_conv_output_shape,
 )
-from theano.tensor.nnet.blocksparse import (
+from aesara.tensor.nnet.blocksparse import (
     SparseBlockGemv,
     SparseBlockOuter,
     sparse_block_gemv_inplace,
@@ -31,10 +31,10 @@
 )
 
 # Cpu implementation
-from theano.tensor.nnet.conv import ConvOp, conv2d
-from theano.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
-from theano.tensor.nnet.corr3d import Corr3dMM, Corr3dMMGradInputs, Corr3dMMGradWeights
-from theano.tensor.type import TensorType
+from aesara.tensor.nnet.conv import ConvOp, conv2d
+from aesara.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
+from aesara.tensor.nnet.corr3d import Corr3dMM, Corr3dMMGradInputs, Corr3dMMGradWeights
+from aesara.tensor.type import TensorType
 
 
 @local_optimizer([SparseBlockGemv], inplace=True)
@@ -87,7 +87,7 @@ def local_inplace_sparse_block_outer(fgraph, node):
 # Conv opts
 @local_optimizer([AbstractConv2d])
 def local_abstractconv_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, Theano will use
+    # If config.blas__ldflags is empty, Aesara will use
     # a NumPy C implementation of [sd]gemm_.
     if config.cxx == "" or node.inputs[0].dtype == "float16":
         return
@@ -115,7 +115,7 @@ def local_abstractconv_gemm(fgraph, node):
 
 @local_optimizer([AbstractConv3d])
 def local_abstractconv3d_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, Theano will use
+    # If config.blas__ldflags is empty, Aesara will use
     # a NumPy C implementation of [sd]gemm_.
     if config.cxx == "" or node.inputs[0].dtype == "float16":
         return
@@ -141,7 +141,7 @@ def local_abstractconv3d_gemm(fgraph, node):
 
 @local_optimizer([AbstractConv2d_gradWeights])
 def local_abstractconv_gradweight_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, Theano will use
+    # If config.blas__ldflags is empty, Aesara will use
     # a NumPy C implementation of [sd]gemm_.
     if config.cxx == "" or node.inputs[0].dtype == "float16":
         return
@@ -164,7 +164,7 @@ def local_abstractconv_gradweight_gemm(fgraph, node):
     if node.op.filter_flip:
         flip = (slice(None),) * (rval.ndim - 2) + (slice(None, None, -1),) * 2
         rval = rval[flip]
-    rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
+    rval = aesara.tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
     copy_stack_trace(node.outputs[0], rval)
 
     return [rval]
@@ -172,7 +172,7 @@ def local_abstractconv_gradweight_gemm(fgraph, node):
 
 @local_optimizer([AbstractConv3d_gradWeights])
 def local_abstractconv3d_gradweight_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, Theano will use
+    # If config.blas__ldflags is empty, Aesara will use
     # a NumPy C implementation of [sd]gemm_.
     if config.cxx == "" or node.inputs[0].dtype == "float16":
         return
@@ -193,7 +193,7 @@ def local_abstractconv3d_gradweight_gemm(fgraph, node):
     # need to flip the kernel if necessary
     if node.op.filter_flip:
         rval = rval[:, :, ::-1, ::-1, ::-1]
-    rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
+    rval = aesara.tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
     copy_stack_trace(node.outputs[0], rval)
 
     return [rval]
@@ -201,7 +201,7 @@ def local_abstractconv3d_gradweight_gemm(fgraph, node):
 
 @local_optimizer([AbstractConv2d_gradInputs])
 def local_abstractconv_gradinputs_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, Theano will use
+    # If config.blas__ldflags is empty, Aesara will use
     # a NumPy C implementation of [sd]gemm_.
     if config.cxx == "" or node.inputs[0].dtype == "float16":
         return
@@ -231,7 +231,7 @@ def local_abstractconv_gradinputs_gemm(fgraph, node):
 
 @local_optimizer([AbstractConv3d_gradInputs])
 def local_abstractconv3d_gradinputs_gemm(fgraph, node):
-    # If config.blas__ldflags is empty, Theano will use
+    # If config.blas__ldflags is empty, Aesara will use
     # a NumPy C implementation of [sd]gemm_.
     if config.cxx == "" or node.inputs[0].dtype == "float16":
         return
@@ -394,7 +394,7 @@ def local_conv2d_gradweight_cpu(fgraph, node):
         res = res.dimshuffle((1, 0, 2, 3))
         res = res[:, :, ::-1, ::-1]
 
-    res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable)
+    res = aesara.tensor.patternbroadcast(res, node.outputs[0].broadcastable)
 
     copy_stack_trace(node.outputs[0], res)
     return [res]
@@ -485,13 +485,13 @@ def local_conv2d_gradinputs_cpu(fgraph, node):
     )
     din = din(topgrad, filters)
     copy_stack_trace(node.outputs[0], din)
-    din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable)
+    din = aesara.tensor.patternbroadcast(din, node.outputs[0].broadcastable)
     copy_stack_trace(node.outputs[0], din)
     return [din]
 
 
 # Register Cpu Optmization
-conv_groupopt = theano.graph.optdb.LocalGroupDB()
+conv_groupopt = aesara.graph.optdb.LocalGroupDB()
 conv_groupopt.__name__ = "conv_opts"
 register_specialize_device(conv_groupopt, "fast_compile", "fast_run")
 
@@ -590,11 +590,11 @@ def local_abstractconv_check(fgraph, node):
         ),
     ):
         raise LocalMetaOptimizerSkipAssertionError(
-            f"{node.op.__class__.__name__} Theano optimization failed: there is no implementation "
+            f"{node.op.__class__.__name__} Aesara optimization failed: there is no implementation "
             "available supporting the requested options. Did you exclude "
             'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, '
             "is cuDNN available and does the GPU support it? If on CPU, "
-            "do you have a BLAS library installed Theano can link against? "
+            "do you have a BLAS library installed Aesara can link against? "
             "On the CPU we do not support float16."
         )
 
diff --git a/theano/tensor/nnet/sigm.py b/aesara/tensor/nnet/sigm.py
similarity index 97%
rename from theano/tensor/nnet/sigm.py
rename to aesara/tensor/nnet/sigm.py
index 784d1d6e2b..6d5fc9794f 100644
--- a/theano/tensor/nnet/sigm.py
+++ b/aesara/tensor/nnet/sigm.py
@@ -11,19 +11,19 @@
 
 import numpy as np
 
-import theano
-from theano import printing
-from theano import scalar as ts
-from theano.configdefaults import config
-from theano.graph.opt import PatternSub, copy_stack_trace, local_optimizer
-from theano.graph.utils import MethodNotDefined
-from theano.printing import pprint
-from theano.tensor import basic_opt
-from theano.tensor.basic import constant, get_scalar_constant_value
-from theano.tensor.elemwise import Elemwise
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import add, clip, exp, inv, log, log1p, mul, neg, sub, true_div
-from theano.tensor.type import TensorType, values_eq_approx_remove_inf
+import aesara
+from aesara import printing
+from aesara import scalar as ts
+from aesara.configdefaults import config
+from aesara.graph.opt import PatternSub, copy_stack_trace, local_optimizer
+from aesara.graph.utils import MethodNotDefined
+from aesara.printing import pprint
+from aesara.tensor import basic_opt
+from aesara.tensor.basic import constant, get_scalar_constant_value
+from aesara.tensor.elemwise import Elemwise
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import add, clip, exp, inv, log, log1p, mul, neg, sub, true_div
+from aesara.tensor.type import TensorType, values_eq_approx_remove_inf
 
 
 class ScalarSigmoid(ts.UnaryScalarOp):
@@ -69,7 +69,7 @@ def c_code(self, node, name, inp, out, sub):
         # The constants were obtained by looking at the output of
         # python commands like:
         #
-        # import numpy, theano
+        # import numpy, aesara
         # dt='float32'  # or float64
         # for i in range(750):
         #     print i, repr(_asarray(1.0, dtype=dt) /
@@ -142,10 +142,10 @@ def gen_graph():
         val = 1 / (1 + np.exp(-data))
 
         def hard_sigmoid(x):
-            return theano.tensor.nnet.hard_sigmoid(x)
+            return aesara.tensor.nnet.hard_sigmoid(x)
 
         def ultra_fast_sigmoid(x):
-            return theano.tensor.nnet.ultra_fast_sigmoid(x)
+            return aesara.tensor.nnet.ultra_fast_sigmoid(x)
 
         val_hard = hard_sigmoid(data).eval()
         val_ultra = ultra_fast_sigmoid(data).eval()
@@ -162,7 +162,7 @@ def ultra_fast_sigmoid(x):
         ax.grid(True)
         ax.legend(("sigmoid", "ultra_fast", "hard"), "upper left")
         fname = os.path.join(
-            os.path.dirname(theano.__file__),
+            os.path.dirname(aesara.__file__),
             "..",
             "doc",
             "library",
@@ -270,7 +270,7 @@ def local_ultra_fast_sigmoid(fgraph, node):
     When enabled, change all sigmoid to ultra_fast_sigmoid.
 
     For example do mode.including('local_ultra_fast_sigmoid')
-    or use the Theano flag optimizer_including=local_ultra_fast_sigmoid.
+    or use the Aesara flag optimizer_including=local_ultra_fast_sigmoid.
 
     This speeds up the sigmoid op by using an approximation.
 
@@ -292,7 +292,7 @@ def values_eq_approx_remove_low_prec(a, b):
         return [out]
 
 
-theano.compile.optdb["uncanonicalize"].register(
+aesara.compile.optdb["uncanonicalize"].register(
     "local_ultra_fast_sigmoid", local_ultra_fast_sigmoid
 )
 
@@ -335,7 +335,7 @@ def values_eq_approx_remove_low_prec(a, b):
         return [out]
 
 
-theano.compile.optdb["uncanonicalize"].register(
+aesara.compile.optdb["uncanonicalize"].register(
     "local_hard_sigmoid", local_hard_sigmoid
 )
 
@@ -384,7 +384,7 @@ def c_code(self, node, name, inp, out, sub):
         (z,) = out
         # These constants were obtained by looking at the output of
         # python commands like:
-        # import numpy, theano
+        # import numpy, aesara
         # dt='float32'  # or float64
         #  for i in range(750):
         #      print i, repr(numpy.log1p(numpy.exp(_asarray([i,-i], dtype=dt))))
@@ -528,7 +528,7 @@ def is_1pexp(t, only_process_constants=True):
                 if config.warn__identify_1pexp_bug:
                     warnings.warn(
                         "Although your current code is fine, please note that "
-                        "Theano versions prior to 0.5 (more specifically, "
+                        "Aesara versions prior to 0.5 (more specifically, "
                         "prior to commit 7987b51 on 2011-12-18) may have "
                         "yielded an incorrect result. To remove this warning, "
                         "either set the `warn__identify_1pexp_bug` config "
diff --git a/theano/tensor/opt_uncanonicalize.py b/aesara/tensor/opt_uncanonicalize.py
similarity index 94%
rename from theano/tensor/opt_uncanonicalize.py
rename to aesara/tensor/opt_uncanonicalize.py
index 4321c6de2f..cd04c534f5 100644
--- a/theano/tensor/opt_uncanonicalize.py
+++ b/aesara/tensor/opt_uncanonicalize.py
@@ -33,17 +33,17 @@
 
 import logging
 
-from theano import scalar as ts
-from theano.graph.opt import copy_stack_trace, local_optimizer
-from theano.tensor.basic import Alloc, alloc, constant
-from theano.tensor.basic_opt import register_uncanonicalize
-from theano.tensor.elemwise import CAReduce, DimShuffle
-from theano.tensor.math import Argmax, Max, MaxAndArgmax, Min, neg
-from theano.tensor.shape import Reshape, reshape
-from theano.tensor.subtensor import Subtensor
+from aesara import scalar as ts
+from aesara.graph.opt import copy_stack_trace, local_optimizer
+from aesara.tensor.basic import Alloc, alloc, constant
+from aesara.tensor.basic_opt import register_uncanonicalize
+from aesara.tensor.elemwise import CAReduce, DimShuffle
+from aesara.tensor.math import Argmax, Max, MaxAndArgmax, Min, neg
+from aesara.tensor.shape import Reshape, reshape
+from aesara.tensor.subtensor import Subtensor
 
 
-_logger = logging.getLogger("theano.tensor.opt_uncanonicalize")
+_logger = logging.getLogger("aesara.tensor.opt_uncanonicalize")
 
 
 @register_uncanonicalize
diff --git a/aesara/tensor/random/__init__.py b/aesara/tensor/random/__init__.py
new file mode 100644
index 0000000000..d1dc5db0e3
--- /dev/null
+++ b/aesara/tensor/random/__init__.py
@@ -0,0 +1,3 @@
+# Initialize `RandomVariable` optimizations
+import aesara.tensor.random.opt
+import aesara.tensor.random.utils
diff --git a/theano/tensor/random/basic.py b/aesara/tensor/random/basic.py
similarity index 97%
rename from theano/tensor/random/basic.py
rename to aesara/tensor/random/basic.py
index ff2517a520..242bed769b 100644
--- a/theano/tensor/random/basic.py
+++ b/aesara/tensor/random/basic.py
@@ -1,10 +1,10 @@
 import numpy as np
 import scipy.stats as stats
 
-import theano
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.random.op import RandomVariable, default_shape_from_params
-from theano.tensor.random.utils import broadcast_params
+import aesara
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.random.op import RandomVariable, default_shape_from_params
+from aesara.tensor.random.utils import broadcast_params
 
 
 try:
@@ -134,7 +134,7 @@ class MvNormalRV(RandomVariable):
 
     def __call__(self, mean=None, cov=None, size=None, **kwargs):
 
-        dtype = theano.config.floatX if self.dtype == "floatX" else self.dtype
+        dtype = aesara.config.floatX if self.dtype == "floatX" else self.dtype
 
         if mean is None:
             mean = np.array([0.0], dtype=dtype)
@@ -462,10 +462,10 @@ def __call__(self, a, size=None, replace=True, p=None, **kwargs):
         a = as_tensor_variable(a, ndim=1)
 
         if p is None:
-            p = theano.tensor.type_other.NoneConst.clone()
+            p = aesara.tensor.type_other.NoneConst.clone()
 
         if isinstance(replace, bool):
-            replace = theano.tensor.constant(np.array(replace))
+            replace = aesara.tensor.constant(np.array(replace))
 
         return super().__call__(a, p, replace, size=size, dtype=a.dtype, **kwargs)
 
diff --git a/theano/tensor/random/op.py b/aesara/tensor/random/op.py
similarity index 93%
rename from theano/tensor/random/op.py
rename to aesara/tensor/random/op.py
index bd35c25bfb..61f66ea05a 100644
--- a/theano/tensor/random/op.py
+++ b/aesara/tensor/random/op.py
@@ -3,24 +3,24 @@
 
 import numpy as np
 
-import theano
-from theano.assert_op import Assert
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import Op
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.basic import (
+import aesara
+from aesara.assert_op import Assert
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import Op
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.basic import (
     as_tensor_variable,
     cast,
     constant,
     get_scalar_constant_value,
     get_vector_length,
 )
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.random.type import RandomStateType
-from theano.tensor.random.utils import params_broadcast_shapes
-from theano.tensor.type import TensorType, all_dtypes, int_dtypes
-from theano.tensor.type_other import NoneConst
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.random.type import RandomStateType
+from aesara.tensor.random.utils import params_broadcast_shapes
+from aesara.tensor.type import TensorType, all_dtypes, int_dtypes
+from aesara.tensor.type_other import NoneConst
 
 
 def default_shape_from_params(
@@ -39,14 +39,14 @@ def default_shape_from_params(
     scalars), since that is already definitively handled in the `Op` that
     calls this.
 
-    TODO: Consider using `theano.compile.ops.shape_i` alongside `ShapeFeature`.
+    TODO: Consider using `aesara.compile.ops.shape_i` alongside `ShapeFeature`.
 
     Parameters
     ----------
     ndim_supp: int
         Total number of dimensions for a single draw of the random variable
         (e.g. a multivariate normal draw is 1D, so `ndim_supp = 1`).
-    dist_params: list of `theano.graph.basic.Variable`
+    dist_params: list of `aesara.graph.basic.Variable`
         The distribution parameters.
     param_shapes: list of tuple of `ScalarVariable` (optional)
         Symbolic shapes for each distribution parameter.  These will
@@ -117,7 +117,7 @@ def __init__(
             ``ndims_params = [1, 2]``).
         dtype: str (optional)
             The dtype of the sampled output.  If the value ``"floatX"`` is
-            given, then ``dtype`` is set to ``theano.config.floatX``.  If
+            given, then ``dtype`` is set to ``aesara.config.floatX``.  If
             ``None`` (the default), the `dtype` keyword must be set when
             `RandomVariable.make_node` is called.
         inplace: boolean (optional)
@@ -235,7 +235,7 @@ def slice_ind_dims(p, ps, n):
             # independent variates should broadcast together.
             p_slices, p_shapes = zip(*params_ind_slice)
 
-            shape_ind = theano.tensor.extra_ops.broadcast_shape_iter(
+            shape_ind = aesara.tensor.extra_ops.broadcast_shape_iter(
                 p_shapes, arrays_are_shapes=True
             )
 
@@ -330,13 +330,13 @@ def make_node(self, rng, size, dtype, *dist_params):
         Parameters
         ----------
         rng: RandomStateType
-            Existing Theano `RandomState` object to be used.  Creates a
+            Existing Aesara `RandomState` object to be used.  Creates a
             new one, if `None`.
         size: int or Sequence
             Numpy-like size of the output (i.e. replications).
         dtype: str
             The dtype of the sampled output.  If the value ``"floatX"`` is
-            given, then ``dtype`` is set to ``theano.config.floatX``.  This
+            given, then ``dtype`` is set to ``aesara.config.floatX``.  This
             value is only used when `self.dtype` isn't set.
         dist_params: list
             Distribution parameters.
@@ -367,7 +367,7 @@ def make_node(self, rng, size, dtype, *dist_params):
         )
 
         if rng is None:
-            rng = theano.shared(np.random.RandomState())
+            rng = aesara.shared(np.random.RandomState())
         elif not isinstance(rng.type, RandomStateType):
             raise TypeError("The type of rng should be an instance of RandomStateType")
 
@@ -426,7 +426,7 @@ def perform(self, node, inputs, outputs):
 
     def grad(self, inputs, outputs):
         return [
-            theano.gradient.grad_undefined(
+            aesara.gradient.grad_undefined(
                 self, k, inp, "No gradient defined for random variables"
             )
             for k, inp in enumerate(inputs)
@@ -478,7 +478,7 @@ def perform(self, node, inputs, out):
 
     def grad(self, inputs, outputs):
         return [
-            theano.gradient.grad_undefined(
+            aesara.gradient.grad_undefined(
                 self, k, inp, "No gradient defined for random variables"
             )
             for k, inp in enumerate(inputs)
diff --git a/theano/tensor/random/opt.py b/aesara/tensor/random/opt.py
similarity index 95%
rename from theano/tensor/random/opt.py
rename to aesara/tensor/random/opt.py
index 347a65557b..9b605cced7 100644
--- a/theano/tensor/random/opt.py
+++ b/aesara/tensor/random/opt.py
@@ -1,15 +1,15 @@
-from theano.compile import optdb
-from theano.configdefaults import config
-from theano.graph.op import compute_test_value
-from theano.graph.opt import in2out, local_optimizer
-from theano.tensor.basic import constant, get_vector_length
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.extra_ops import broadcast_to
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.random.op import RandomVariable
-from theano.tensor.random.utils import broadcast_params
-from theano.tensor.shape import Shape
-from theano.tensor.subtensor import (
+from aesara.compile import optdb
+from aesara.configdefaults import config
+from aesara.graph.op import compute_test_value
+from aesara.graph.opt import in2out, local_optimizer
+from aesara.tensor.basic import constant, get_vector_length
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.extra_ops import broadcast_to
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.random.op import RandomVariable
+from aesara.tensor.random.utils import broadcast_params
+from aesara.tensor.shape import Shape
+from aesara.tensor.subtensor import (
     AdvancedSubtensor,
     AdvancedSubtensor1,
     Subtensor,
diff --git a/theano/tensor/random/type.py b/aesara/tensor/random/type.py
similarity index 95%
rename from theano/tensor/random/type.py
rename to aesara/tensor/random/type.py
index f97268ca02..598bc3c9b9 100644
--- a/theano/tensor/random/type.py
+++ b/aesara/tensor/random/type.py
@@ -2,8 +2,8 @@
 
 import numpy as np
 
-import theano
-from theano.graph.type import Type
+import aesara
+from aesara.graph.type import Type
 
 
 class RandomStateType(Type):
@@ -65,7 +65,7 @@ def may_share_memory(a, b):
 
 
 # Register `RandomStateType`'s C code for `ViewOp`.
-theano.compile.register_view_op_c_code(
+aesara.compile.register_view_op_c_code(
     RandomStateType,
     """
     Py_XDECREF(%(oname)s);
diff --git a/theano/tensor/random/utils.py b/aesara/tensor/random/utils.py
similarity index 90%
rename from theano/tensor/random/utils.py
rename to aesara/tensor/random/utils.py
index 100ce10e7b..97e913ce6c 100644
--- a/theano/tensor/random/utils.py
+++ b/aesara/tensor/random/utils.py
@@ -3,13 +3,13 @@
 
 import numpy as np
 
-from theano.compile.sharedvalue import shared
-from theano.graph.basic import Variable
-from theano.tensor.extra_ops import broadcast_to
-from theano.tensor.math import maximum
+from aesara.compile.sharedvalue import shared
+from aesara.graph.basic import Variable
+from aesara.tensor.extra_ops import broadcast_to
+from aesara.tensor.math import maximum
 
 
-def params_broadcast_shapes(param_shapes, ndims_params, use_theano=True):
+def params_broadcast_shapes(param_shapes, ndims_params, use_aesara=True):
     """Broadcast parameters that have different dimensions.
 
     Parameters
@@ -18,15 +18,15 @@ def params_broadcast_shapes(param_shapes, ndims_params, use_theano=True):
         The shapes of each parameters to broadcast.
     ndims_params : list of int
         The expected number of dimensions for each element in `params`.
-    use_theano : bool
-        If ``True``, use Theano `Op`; otherwise, use NumPy.
+    use_aesara : bool
+        If ``True``, use Aesara `Op`; otherwise, use NumPy.
 
     Returns
     =======
     bcast_shapes : list of ndarray
         The broadcasted values of `params`.
     """
-    max_fn = maximum if use_theano else max
+    max_fn = maximum if use_aesara else max
 
     rev_extra_dims = []
     for ndim_param, param_shape in zip(ndims_params, param_shapes):
@@ -78,15 +78,15 @@ def broadcast_params(params, ndims_params):
     bcast_params : list of ndarray
         The broadcasted values of `params`.
     """
-    use_theano = False
+    use_aesara = False
     param_shapes = []
     for p in params:
         param_shape = p.shape
-        use_theano |= isinstance(p, Variable)
+        use_aesara |= isinstance(p, Variable)
         param_shapes.append(param_shape)
 
-    shapes = params_broadcast_shapes(param_shapes, ndims_params, use_theano=use_theano)
-    broadcast_to_fn = broadcast_to if use_theano else np.broadcast_to
+    shapes = params_broadcast_shapes(param_shapes, ndims_params, use_aesara=use_aesara)
+    broadcast_to_fn = broadcast_to if use_aesara else np.broadcast_to
 
     bcast_params = [
         broadcast_to_fn(param, shape) for shape, param in zip(shapes, params)
@@ -116,7 +116,7 @@ class RandomStream:
 
     def __init__(self, seed=None, namespace=None):
         if namespace is None:
-            from theano.tensor.random import basic  # pylint: disable=import-self
+            from aesara.tensor.random import basic  # pylint: disable=import-self
 
             self.namespaces = [basic]
         else:
@@ -135,7 +135,7 @@ def __getattr__(self, obj):
         if ns_obj is None:
             raise AttributeError("No attribute {}.".format(obj))
 
-        from theano.tensor.random.op import RandomVariable
+        from aesara.tensor.random.op import RandomVariable
 
         if isinstance(ns_obj, RandomVariable):
 
diff --git a/theano/tensor/random/var.py b/aesara/tensor/random/var.py
similarity index 85%
rename from theano/tensor/random/var.py
rename to aesara/tensor/random/var.py
index 0be08445f1..45de6d55b1 100644
--- a/theano/tensor/random/var.py
+++ b/aesara/tensor/random/var.py
@@ -2,8 +2,8 @@
 
 import numpy as np
 
-from theano.compile.sharedvalue import SharedVariable, shared_constructor
-from theano.tensor.random.type import random_state_type
+from aesara.compile.sharedvalue import SharedVariable, shared_constructor
+from aesara.tensor.random.type import random_state_type
 
 
 class RandomStateSharedVariable(SharedVariable):
diff --git a/theano/tensor/shape.py b/aesara/tensor/shape.py
similarity index 92%
rename from theano/tensor/shape.py
rename to aesara/tensor/shape.py
index e1705aac80..3cf3395878 100644
--- a/theano/tensor/shape.py
+++ b/aesara/tensor/shape.py
@@ -2,29 +2,29 @@
 
 import numpy as np
 
-import theano
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import COp
-from theano.graph.params_type import ParamsType
-from theano.misc.safe_asarray import _asarray
-from theano.scalar import int32
-from theano.tensor import basic as tt
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.type import TensorType, int_dtypes, tensor
-from theano.tensor.var import TensorConstant, TensorVariable
+import aesara
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import COp
+from aesara.graph.params_type import ParamsType
+from aesara.misc.safe_asarray import _asarray
+from aesara.scalar import int32
+from aesara.tensor import basic as tt
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.type import TensorType, int_dtypes, tensor
+from aesara.tensor.var import TensorConstant, TensorVariable
 
 
 def register_shape_c_code(type, code, version=()):
     """
-    Tell Shape Op how to generate C code for a Theano Type.
+    Tell Shape Op how to generate C code for an Aesara Type.
 
     Parameters
     ----------
-    typ : Theano type
-        It must be the Theano class itself and not an instance of the class.
+    typ : Aesara type
+        It must be the Aesara class itself and not an instance of the class.
     code : C code
-        Returns a vector representing the shape for the Theano type 'typ'.
+        Returns a vector representing the shape for the Aesara type 'typ'.
         Use %(iname)s and %(oname)s for the input and output C variable names
         respectively.
     version
@@ -59,7 +59,7 @@ def make_node(self, x):
         # This will fail at execution time.
         if not isinstance(x, Variable):
             x = tt.as_tensor_variable(x)
-        return Apply(self, [x], [theano.tensor.type.lvector()])
+        return Apply(self, [x], [aesara.tensor.type.lvector()])
 
     def perform(self, node, inp, out_):
         (x,) = inp
@@ -83,7 +83,7 @@ def grad(self, inp, grads):
         # the elements of the tensor variable do not participate
         # in the computation of the shape, so they are not really
         # part of the graph
-        return [theano.gradient.DisconnectedType()()]
+        return [aesara.gradient.DisconnectedType()()]
 
     def R_op(self, inputs, eval_points):
         return [None]
@@ -153,22 +153,22 @@ def __init__(self, i):
         # As i will be used in the hash and that ndarray are not hashable,
         # we need to convert it to an int as it is hashable.
         if isinstance(i, np.ndarray):
-            assert i.dtype in theano.tensor.type.integer_dtypes
+            assert i.dtype in aesara.tensor.type.integer_dtypes
         assert i == int(i)
         i = int(i)
         self.i = i
 
     # NB:
     # 1) params_type is defined as a property to avoid
-    #    loop in Python import caused by importing theano.scalar below
+    #    loop in Python import caused by importing aesara.scalar below
     #    when params_type is defined directly in class code.
     # 2) We wrap scalar into ParamsType (instead of directly using scalar as op param)
-    #    to avoid Theano converting scalar param to constant that would be later
+    #    to avoid Aesara converting scalar param to constant that would be later
     #    hardcoded as litteral in C code, making us loose all the advantages of
     #    using params.
     @property
     def params_type(self):
-        return ParamsType(i=theano.scalar.basic.int64)
+        return ParamsType(i=aesara.scalar.basic.int64)
 
     def __str__(self):
         return "%s{%i}" % (self.__class__.__name__, self.i)
@@ -178,7 +178,7 @@ def make_node(self, x):
             raise TypeError("x must be Variable with ndim attribute", x)
         if x.ndim <= self.i:
             raise TypeError("x has too few dimensions for Shape_i", (x, self.i))
-        return Apply(self, [x], [theano.tensor.type.lscalar()])
+        return Apply(self, [x], [aesara.tensor.type.lscalar()])
 
     def perform(self, node, inp, out_, params):
         (x,) = inp
@@ -238,7 +238,7 @@ def connection_pattern(self, node):
 
     def grad(self, inp, grads):
         return [
-            theano.gradient.grad_not_implemented(
+            aesara.gradient.grad_not_implemented(
                 op=self,
                 x_pos=0,
                 x=inp[0],
@@ -301,14 +301,14 @@ def shape_i_op(i):
 
 def register_shape_i_c_code(typ, code, check_input, version=()):
     """
-    Tell Shape_i how to generate C code for a Theano Type.
+    Tell Shape_i how to generate C code for an Aesara Type.
 
     Parameters
     ----------
-    typ : Theano type
-        It must be the Theano class itself and not an instance of the class.
+    typ : Aesara type
+        It must be the Aesara class itself and not an instance of the class.
     code : C code
-        Gets the shape of dimensions %(i)s for the Theano type 'typ'.
+        Gets the shape of dimensions %(i)s for the Aesara type 'typ'.
         Use %(iname)s and %(oname)s for the input and output C variable names
         respectively.
     version
@@ -320,14 +320,14 @@ def register_shape_i_c_code(typ, code, check_input, version=()):
 
 def register_specify_shape_c_code(typ, code, version=(), c_support_code_apply=None):
     """
-    Tell SpecifyShape how to generate C code for a Theano Type.
+    Tell SpecifyShape how to generate C code for an Aesara Type.
 
     Parameters
     ----------
-    typ : Theano type
-        It must be the Theano class itself and not an instance of the class.
+    typ : Aesara type
+        It must be the Aesara class itself and not an instance of the class.
     code : C code
-        Checks the shape and returns a view for the Theano type 'typ'.
+        Checks the shape and returns a view for the Aesara type 'typ'.
         Use %(iname)s and %(oname)s for the input and output C variable names
         respectively. %(shape)s is the vector of shape of %(iname)s.
         Check that its length is good.
@@ -373,7 +373,7 @@ def make_node(self, x, shape):
         shape = tt.as_tensor_variable(shape)
         if shape.ndim > 1:
             raise AssertionError()
-        if shape.dtype not in theano.tensor.type.integer_dtypes:
+        if shape.dtype not in aesara.tensor.type.integer_dtypes:
             raise AssertionError()
         if isinstance(shape, TensorConstant) and shape.data.size != x.ndim:
             raise AssertionError()
@@ -413,8 +413,8 @@ def grad(self, inp, grads):
         # to remove that op from the graph to don't block other optimization
         # Should I do an optimizer that will remove the SpecifyShape?
         # I think Yes
-        return [gz, theano.gradient.DisconnectedType()()]
-        return [specify_shape(gz, s), theano.gradient.DisconnectedType()()]
+        return [gz, aesara.gradient.DisconnectedType()()]
+        return [specify_shape(gz, s), aesara.gradient.DisconnectedType()()]
 
     def R_op(self, inputs, eval_points):
         if eval_points[0] is None:
@@ -556,7 +556,7 @@ def R_op(self, inputs, eval_points):
         return self(eval_points[0], *inputs[1:], **dict(return_list=True))
 
     def infer_shape(self, fgraph, node, ishapes):
-        from theano.tensor.math import eq, maximum, mul
+        from aesara.tensor.math import eq, maximum, mul
 
         # inputs[1] can contain at most one value of '-1', meaning the actual
         # shape of the output will be automatically computed by reshape, so
@@ -684,7 +684,7 @@ def reshape(x, newshape, ndim=None):
         except ValueError:
             raise ValueError(
                 f"The length of the provided shape ({newshape}) cannot "
-                "be automatically determined, so Theano is not able "
+                "be automatically determined, so Aesara is not able "
                 "to know what the number of dimensions of the reshaped "
                 "variable will be. You can provide the 'ndim' keyword "
                 "argument to 'reshape' to avoid this problem."
@@ -731,14 +731,14 @@ def shape_padaxis(t, axis):
 
     Examples
     --------
-    >>> tensor = theano.tensor.type.tensor3()
-    >>> theano.tensor.shape_padaxis(tensor, axis=0)
+    >>> tensor = aesara.tensor.type.tensor3()
+    >>> aesara.tensor.shape_padaxis(tensor, axis=0)
     DimShuffle{x,0,1,2}.0
-    >>> theano.tensor.shape_padaxis(tensor, axis=1)
+    >>> aesara.tensor.shape_padaxis(tensor, axis=1)
     DimShuffle{0,x,1,2}.0
-    >>> theano.tensor.shape_padaxis(tensor, axis=3)
+    >>> aesara.tensor.shape_padaxis(tensor, axis=3)
     DimShuffle{0,1,2,x}.0
-    >>> theano.tensor.shape_padaxis(tensor, axis=-1)
+    >>> aesara.tensor.shape_padaxis(tensor, axis=-1)
     DimShuffle{0,1,2,x}.0
 
     See Also
diff --git a/theano/tensor/sharedvar.py b/aesara/tensor/sharedvar.py
similarity index 90%
rename from theano/tensor/sharedvar.py
rename to aesara/tensor/sharedvar.py
index 780226d09c..99b19c5d1b 100644
--- a/theano/tensor/sharedvar.py
+++ b/aesara/tensor/sharedvar.py
@@ -2,10 +2,10 @@
 
 import numpy as np
 
-from theano.compile import SharedVariable, shared_constructor
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.type import TensorType
-from theano.tensor.var import _tensor_py_operators
+from aesara.compile import SharedVariable, shared_constructor
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.type import TensorType
+from aesara.tensor.var import _tensor_py_operators
 
 
 def load_shared_variable(val):
@@ -70,7 +70,7 @@ def tensor_constructor(
 # _tensor_py_operators is first to have its version of __{gt,ge,lt,le}__
 #
 # N.B. THERE IS ANOTHER CLASS CALLED ScalarSharedVariable in the
-# theano.scalar.sharedvar file.  It is not registered as a shared_constructor,
+# aesara.scalar.sharedvar file.  It is not registered as a shared_constructor,
 # this one is.
 class ScalarSharedVariable(_tensor_py_operators, SharedVariable):
     pass
@@ -89,7 +89,7 @@ def scalar_constructor(
 
     We ignore the borrow parameter as we convert ``value`` to an
     ndarray (this is a new object). This respects the semantic of
-    borrow, as it is a hint to Theano that we can reuse it.
+    borrow, as it is a hint to Aesara that we can reuse it.
 
     """
     if target != "cpu":
diff --git a/theano/tensor/signal/__init__.py b/aesara/tensor/signal/__init__.py
similarity index 100%
rename from theano/tensor/signal/__init__.py
rename to aesara/tensor/signal/__init__.py
diff --git a/theano/tensor/signal/conv.py b/aesara/tensor/signal/conv.py
similarity index 87%
rename from theano/tensor/signal/conv.py
rename to aesara/tensor/signal/conv.py
index 05b1910613..c70a2f565b 100644
--- a/theano/tensor/signal/conv.py
+++ b/aesara/tensor/signal/conv.py
@@ -7,16 +7,16 @@
 import logging
 import warnings
 
-from theano import tensor as tt
-from theano.configdefaults import config
-from theano.tensor.nnet import conv
-from theano.tensor.shape import reshape
+from aesara import tensor as tt
+from aesara.configdefaults import config
+from aesara.tensor.nnet import conv
+from aesara.tensor.shape import reshape
 
 
 __docformat__ = "restructuredtext en"
 
 
-_logger = logging.getLogger("theano.tensor.signal.conv")
+_logger = logging.getLogger("aesara.tensor.signal.conv")
 
 
 def conv2d(
@@ -38,9 +38,9 @@ def conv2d(
 
     Parameters
     ----------
-    input   : Symbolic theano tensor for images to be filtered.
+    input   : Symbolic aesara tensor for images to be filtered.
               Dimensions: ([num_images], image height, image width)
-    filters : Symbolic theano tensor for convolution filter(s).
+    filters : Symbolic aesara tensor for convolution filter(s).
               Dimensions: ([num_filters], filter height, filter width)
     border_mode: {'valid', 'full'}
         See scipy.signal.convolve2d.
@@ -51,7 +51,7 @@ def conv2d(
     filter_shape : tuple of length 2 or 3
         ([num_filters,] filter height, filter width).
     kwargs
-        See theano.tensor.nnet.conv.conv2d.
+        See aesara.tensor.nnet.conv.conv2d.
 
     Returns
     -------
@@ -115,8 +115,8 @@ def conv2d(
     if input.ndim == 2 and filters.ndim == 2:
         if config.warn__signal_conv2d_interface:
             warnings.warn(
-                "theano.tensor.signal.conv2d() now outputs a 2d tensor when both"
-                " inputs are 2d. To disable this warning, set the Theano flag"
+                "aesara.tensor.signal.conv2d() now outputs a 2d tensor when both"
+                " inputs are 2d. To disable this warning, set the Aesara flag"
                 " warn__signal_conv2d_interface to False",
                 stacklevel=3,
             )
diff --git a/theano/tensor/signal/pool.py b/aesara/tensor/signal/pool.py
similarity index 98%
rename from theano/tensor/signal/pool.py
rename to aesara/tensor/signal/pool.py
index 2341797e77..233d2f4fc5 100755
--- a/theano/tensor/signal/pool.py
+++ b/aesara/tensor/signal/pool.py
@@ -8,16 +8,16 @@
 
 import numpy as np
 
-import theano.tensor.basic as tt
-import theano.tensor.math as tm
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply, Constant, Variable
-from theano.graph.op import OpenMPOp
-from theano.graph.params_type import ParamsType
-from theano.graph.type import EnumList
-from theano.graph.utils import MethodNotDefined
-from theano.scalar import bool as bool_t
-from theano.tensor.type import TensorType, int_dtypes
+import aesara.tensor.basic as tt
+import aesara.tensor.math as tm
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply, Constant, Variable
+from aesara.graph.op import OpenMPOp
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import EnumList
+from aesara.graph.utils import MethodNotDefined
+from aesara.scalar import bool as bool_t
+from aesara.tensor.type import TensorType, int_dtypes
 
 
 def max_pool_2d_same_size(input, patch_size):
@@ -29,9 +29,9 @@ def max_pool_2d_same_size(input, patch_size):
 
     Parameters
     ----------
-    input : 4-D theano tensor of input images
+    input : 4-D aesara tensor of input images
         Input images. Max pooling will be done over the 2 last dimensions.
-    patch_size : tuple of length 2 or theano vector of ints of size 2.
+    patch_size : tuple of length 2 or aesara vector of ints of size 2.
         Size of the patch (patch height, patch width).
         (2,2) will retain only one non-zero value per patch of 4 values.
 
@@ -60,20 +60,20 @@ def pool_2d(
 
     Parameters
     ----------
-    input : N-D theano tensor of input images
+    input : N-D aesara tensor of input images
         Input images. Max pooling will be done over the 2 last dimensions.
-    ws : tuple of length 2 or theano vector of ints of size 2.
+    ws : tuple of length 2 or aesara vector of ints of size 2.
         Factor by which to downscale (vertical ws, horizontal ws).
         (2,2) will halve the image in each dimension.
     ignore_border : bool (default None, will print a warning and set to False)
         When True, (5,5) input with ws=(2,2) will generate a (2,2) output.
         (3,3) otherwise.
-    stride : tuple of two ints or theano vector of ints of size 2.
+    stride : tuple of two ints or aesara vector of ints of size 2.
         Stride size, which is the number of shifts over rows/cols to get the
         next pool region. If stride is None, it is considered equal to ws
         (no overlap on pooling regions), eg: stride=(1,1) will shifts over
         one row and one col for every iteration.
-    pad : tuple of two ints or theano vector of ints of size 2.
+    pad : tuple of two ints or aesara vector of ints of size 2.
         (pad_h, pad_w), pad zeros to extend beyond four borders of the
         images, pad_h is the size of the top and bottom margins, and
         pad_w is the size of the left and right margins.
@@ -146,7 +146,7 @@ def pool_2d(
         warnings.warn(
             "pool_2d() will have the parameter ignore_border"
             " default value changed to True (currently"
-            " False). To have consistent behavior with all Theano"
+            " False). To have consistent behavior with all Aesara"
             " version, explicitly add the parameter ignore_border=True."
             " On the GPU, using ignore_border=True is needed to use cuDNN."
             " When using ignore_border=False and not using cuDNN, the only"
@@ -181,19 +181,19 @@ def pool_3d(
 
     Parameters
     ----------
-    input : N-D theano tensor of input images
+    input : N-D aesara tensor of input images
         Input images. Max pooling will be done over the 3 last dimensions.
-    ws : tuple of length 3 or theano vector of ints of size 3
+    ws : tuple of length 3 or aesara vector of ints of size 3
         Factor by which to downscale (vertical ws, horizontal ws, depth ws).
         (2,2,2) will halve the image in each dimension.
     ignore_border : bool (default None, will print a warning and set to False)
         When True, (5,5,5) input with ws=(2,2,2) will generate a (2,2,2) output.
         (3,3,3) otherwise.
-    st : tuple of three ints or theano vector of ints of size 3
+    st : tuple of three ints or aesara vector of ints of size 3
         Stride size, which is the number of shifts over rows/cols/slices to get
         the next pool region. If st is None, it is considered equal to ws
         (no overlap on pooling regions).
-    pad : tuple of two ints or theano vector of ints of size 3
+    pad : tuple of two ints or aesara vector of ints of size 3
         (pad_h, pad_w, pad_d), pad zeros to extend beyond six borders of the
         images, pad_h is the size of the top and bottom margins,
         pad_w is the size of the left and right margins, and pad_d is the size
@@ -267,7 +267,7 @@ def pool_3d(
         warnings.warn(
             "pool_3d() will have the parameter ignore_border"
             " default value changed to True (currently"
-            " False). To have consistent behavior with all Theano"
+            " False). To have consistent behavior with all Aesara"
             " version, explicitly add the parameter ignore_border=True."
             " On the GPU, using ignore_border=True is needed to use cuDNN."
             " When using ignore_border=False and not using cuDNN, the only"
@@ -286,7 +286,7 @@ def pool_3d(
 # NB: This enum type is currently used in gpuarray/pool.py.
 # It may be used later as op param in this current file.
 # Enum name and constants names are inspired from cuDNN type `cudnnPoolingMode_t`
-# (cf. `theano/gpuarray/cudnn_defs.py`).
+# (cf. `aesara/gpuarray/cudnn_defs.py`).
 PoolingMode_t = EnumList(
     ("POOLING_MAX", "max"),
     ("POOLING_SUM", "sum"),
@@ -355,7 +355,7 @@ def out_shape(
 
         Parameters
         ----------
-        imgshape : tuple, list, or similar of integer or scalar Theano variable
+        imgshape : tuple, list, or similar of integer or scalar Aesara variable
             The shape of a tensor of images. The last N elements are
             interpreted as the number of rows, and the number of cols.
         ws : list or tuple of N ints
@@ -1001,7 +1001,7 @@ def out_shape(
 
         Parameters
         ----------
-        imgshape : tuple of integers or scalar Theano variables
+        imgshape : tuple of integers or scalar Aesara variables
             the shape of a tensor of images. The last N elements are
             interpreted as the downsampling dimensions.
         ws : tuple of N ints
diff --git a/theano/tensor/slinalg.py b/aesara/tensor/slinalg.py
similarity index 95%
rename from theano/tensor/slinalg.py
rename to aesara/tensor/slinalg.py
index 0d90c823ae..2e77c55e7e 100644
--- a/theano/tensor/slinalg.py
+++ b/aesara/tensor/slinalg.py
@@ -12,13 +12,13 @@
     # some ops (e.g. Cholesky, Solve, A_Xinv_b) won't work
     imported_scipy = False
 
-import theano.tensor
-import theano.tensor.basic as tt
-import theano.tensor.math as tm
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor import as_tensor_variable
-from theano.tensor.type import matrix, tensor, vector
+import aesara.tensor
+import aesara.tensor.basic as tt
+import aesara.tensor.math as tm
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor import as_tensor_variable
+from aesara.tensor.type import matrix, tensor, vector
 
 
 logger = logging.getLogger(__name__)
@@ -322,12 +322,12 @@ def L_op(self, inputs, outputs, output_gradients):
 """
 # lower and upper triangular solves
 solve_lower_triangular = Solve(A_structure="lower_triangular", lower=True)
-"""Optimized implementation of :func:`theano.tensor.slinalg.solve` when A is lower triangular."""
+"""Optimized implementation of :func:`aesara.tensor.slinalg.solve` when A is lower triangular."""
 solve_upper_triangular = Solve(A_structure="upper_triangular", lower=False)
-"""Optimized implementation of :func:`theano.tensor.slinalg.solve` when A is upper triangular."""
+"""Optimized implementation of :func:`aesara.tensor.slinalg.solve` when A is upper triangular."""
 # symmetric solves
 solve_symmetric = Solve(A_structure="symmetric")
-"""Optimized implementation of :func:`theano.tensor.slinalg.solve` when A is symmetric."""
+"""Optimized implementation of :func:`aesara.tensor.slinalg.solve` when A is symmetric."""
 
 # TODO: Optimizations to replace multiplication by matrix inverse
 #      with solve() Op (still unwritten)
@@ -350,11 +350,11 @@ def make_node(self, a, b):
             imported_scipy
         ), "Scipy not  available. Scipy is needed for the Eigvalsh op"
 
-        if b == theano.tensor.type_other.NoneConst:
+        if b == aesara.tensor.type_other.NoneConst:
             a = as_tensor_variable(a)
             assert a.ndim == 2
 
-            out_dtype = theano.scalar.upcast(a.dtype)
+            out_dtype = aesara.scalar.upcast(a.dtype)
             w = vector(dtype=out_dtype)
             return Apply(self, [a], [w])
         else:
@@ -363,7 +363,7 @@ def make_node(self, a, b):
             assert a.ndim == 2
             assert b.ndim == 2
 
-            out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
+            out_dtype = aesara.scalar.upcast(a.dtype, b.dtype)
             w = vector(dtype=out_dtype)
             return Apply(self, [a, b], [w])
 
@@ -392,7 +392,7 @@ class EigvalshGrad(Op):
     """
 
     # Note: This Op (EigvalshGrad), should be removed and replaced with a graph
-    # of theano ops that is constructed directly in Eigvalsh.grad.
+    # of aesara ops that is constructed directly in Eigvalsh.grad.
     # But this can only be done once scipy.linalg.eigh is available as an Op
     # (currently the Eigh uses numpy.linalg.eigh, which doesn't let you
     # pass the right-hand-side matrix for a generalized eigenproblem.) See the
@@ -422,7 +422,7 @@ def make_node(self, a, b, gw):
         assert b.ndim == 2
         assert gw.ndim == 1
 
-        out_dtype = theano.scalar.upcast(a.dtype, b.dtype, gw.dtype)
+        out_dtype = aesara.scalar.upcast(a.dtype, b.dtype, gw.dtype)
         out1 = matrix(dtype=out_dtype)
         out2 = matrix(dtype=out_dtype)
         return Apply(self, [a, b, gw], [out1, out2])
diff --git a/theano/tensor/sort.py b/aesara/tensor/sort.py
similarity index 97%
rename from theano/tensor/sort.py
rename to aesara/tensor/sort.py
index a2632bd536..9da573dcca 100644
--- a/theano/tensor/sort.py
+++ b/aesara/tensor/sort.py
@@ -1,14 +1,14 @@
 import numpy as np
 
-from theano.gradient import grad_undefined
-from theano.graph.basic import Apply, Constant
-from theano.graph.op import Op
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.basic import arange, as_tensor_variable, flatten, switch
-from theano.tensor.math import eq, ge, mul
-from theano.tensor.shape import shape
-from theano.tensor.subtensor import set_subtensor
-from theano.tensor.type import TensorType, integer_dtypes
+from aesara.gradient import grad_undefined
+from aesara.graph.basic import Apply, Constant
+from aesara.graph.op import Op
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.basic import arange, as_tensor_variable, flatten, switch
+from aesara.tensor.math import eq, ge, mul
+from aesara.tensor.shape import shape
+from aesara.tensor.subtensor import set_subtensor
+from aesara.tensor.type import TensorType, integer_dtypes
 
 
 def _variable_is_none(var):
diff --git a/theano/tensor/subtensor.py b/aesara/tensor/subtensor.py
similarity index 97%
rename from theano/tensor/subtensor.py
rename to aesara/tensor/subtensor.py
index 1a6cba455b..804f9f7857 100644
--- a/theano/tensor/subtensor.py
+++ b/aesara/tensor/subtensor.py
@@ -6,27 +6,27 @@
 
 import numpy as np
 
-import theano
-from theano import scalar as ts
-from theano.configdefaults import config
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import COp, Op
-from theano.graph.params_type import ParamsType
-from theano.graph.type import CType
-from theano.graph.utils import MethodNotDefined
-from theano.misc.safe_asarray import _asarray
-from theano.printing import pprint
-from theano.tensor.basic import addbroadcast, alloc, get_scalar_constant_value
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.exceptions import (
+import aesara
+from aesara import scalar as ts
+from aesara.configdefaults import config
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import COp, Op
+from aesara.graph.params_type import ParamsType
+from aesara.graph.type import CType
+from aesara.graph.utils import MethodNotDefined
+from aesara.misc.safe_asarray import _asarray
+from aesara.printing import pprint
+from aesara.tensor.basic import addbroadcast, alloc, get_scalar_constant_value
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.exceptions import (
     AdvancedIndexingError,
     NotScalarConstantError,
     ShapeError,
 )
-from theano.tensor.math import clip
-from theano.tensor.shape import Reshape
-from theano.tensor.type import (
+from aesara.tensor.math import clip
+from aesara.tensor.shape import Reshape
+from aesara.tensor.type import (
     TensorType,
     bscalar,
     complex_dtypes,
@@ -41,10 +41,10 @@
     wscalar,
     zscalar,
 )
-from theano.tensor.type_other import NoneConst, NoneTypeT, SliceType, make_slice
+from aesara.tensor.type_other import NoneConst, NoneTypeT, SliceType, make_slice
 
 
-_logger = logging.getLogger("theano.tensor.subtensor")
+_logger = logging.getLogger("aesara.tensor.subtensor")
 
 invalid_scal_types = (ts.float64, ts.float32, ts.float16)
 scal_types = (ts.int64, ts.int32, ts.int16, ts.int8)
@@ -63,7 +63,7 @@
 
 
 def as_index_constant(a):
-    """Convert Python literals to Theano constants--when possible--in Subtensor arguments.
+    """Convert Python literals to Aesara constants--when possible--in Subtensor arguments.
 
     This will leave `Variable`s untouched.
     """
@@ -78,7 +78,7 @@ def as_index_constant(a):
     elif isinstance(a, (int, np.integer)):
         return ts.ScalarConstant(ts.int64, a)
     elif not isinstance(a, Variable):
-        return theano.tensor.as_tensor(a)
+        return aesara.tensor.as_tensor(a)
     else:
         return a
 
@@ -128,7 +128,7 @@ def get_canonical_form_slice(theslice, length):
     if the resulting set of numbers needs to be reversed or not.
 
     """
-    from theano.tensor import extract_constant, ge, lt, sgn, switch
+    from aesara.tensor import extract_constant, ge, lt, sgn, switch
 
     if isinstance(theslice, slice):
 
@@ -290,7 +290,7 @@ def range_len(slc):
     Adapted from CPython.
 
     """
-    from theano.tensor import and_, gt, lt, switch
+    from aesara.tensor import and_, gt, lt, switch
 
     start, stop, step = tuple(
         as_index_constant(a) for a in [slc.start, slc.stop, slc.step]
@@ -414,7 +414,7 @@ def indexed_result_shape(array_shape, indices, indices_are_shapes=False):
     """
     res_shape = ()
 
-    remaining_dims = range(theano.tensor.basic.get_vector_length(array_shape))
+    remaining_dims = range(aesara.tensor.basic.get_vector_length(array_shape))
     idx_groups = group_indices(indices)
 
     if len(idx_groups) > 2 or len(idx_groups) > 1 and not idx_groups[0][0]:
@@ -433,7 +433,7 @@ def indexed_result_shape(array_shape, indices, indices_are_shapes=False):
             grp_shapes = tuple(array_shape[dim] for dim in dim_nums)
             res_shape += basic_shape(grp_shapes, grp_indices)
         else:
-            from theano.tensor.extra_ops import broadcast_shape
+            from aesara.tensor.extra_ops import broadcast_shape
 
             res_shape += broadcast_shape(
                 *grp_indices, arrays_are_shapes=indices_are_shapes
@@ -550,7 +550,7 @@ def convert(entry, slice_ok=True):
         elif isinstance(entry, (int, np.integer)):
             # Disallow the use of python scalars in idx_list
             raise TypeError(
-                "Python scalar in idx_list." "Please report this error to theano-dev."
+                "Python scalar in idx_list." "Please report this error to aesara-dev."
             )
         else:
             raise AdvancedIndexingError("Invalid index type or slice for Subtensor")
@@ -578,7 +578,7 @@ def get_constant_idx(
 
         Examples
         --------
-        Example usage where v, a are appropriately typed theano variables :
+        Example usage where v, a are appropriately typed aesara variables :
         >>> b = a[v, 1:3]
         >>> b.owner.op.idx_list
         (Scalar(int64), slice(Scalar(int64), Scalar(int64), None))
@@ -619,7 +619,7 @@ def my_as_scalar(a):
         # create a circular import) , this method converts either a
         # TensorVariable or a ScalarVariable to a scalar.
         if isinstance(a, Variable) and isinstance(a.type, TensorType):
-            return theano.tensor.scalar_from_tensor(a)
+            return aesara.tensor.scalar_from_tensor(a)
         else:
             return ts.as_scalar(a)
 
@@ -630,10 +630,10 @@ def make_node(self, x, *inputs):
         x
             The tensor to take a subtensor of.
         inputs
-            A list of theano Scalars.
+            A list of aesara Scalars.
 
         """
-        x = theano.tensor.as_tensor_variable(x)
+        x = aesara.tensor.as_tensor_variable(x)
         inputs = tuple(self.my_as_scalar(a) for a in inputs)
 
         idx_list = list(self.idx_list)
@@ -741,7 +741,7 @@ def grad(self, inputs, grads):
             # This allow the opt local_IncSubtensor_serialize to apply first.
             # We have an optimization that will convert this to a
             # set subtensor here at:
-            # theano/tensor/opt.py:local_incsubtensor_of_zeros_to_setsubtensor()
+            # aesara/tensor/opt.py:local_incsubtensor_of_zeros_to_setsubtensor()
             first = IncSubtensor(self.idx_list)(x.zeros_like(), gz, *rest)
         return [first] + [DisconnectedType()()] * len(rest)
 
@@ -1228,7 +1228,7 @@ def inc_subtensor(
     y
         The amount by which to increment the subtensor in question.
     inplace
-        Don't use. Theano will do it when possible.
+        Don't use. Aesara will do it when possible.
     set_instead_of_inc
         If True, do a set_subtensor instead.
     tolerate_inplace_aliasing:
@@ -1248,8 +1248,8 @@ def inc_subtensor(
     # First of all, y cannot have a higher dimension than x,
     # nor have non-broadcastable dimensions where x is broadcastable.
 
-    x = theano.tensor.as_tensor_variable(x)
-    y = theano.tensor.as_tensor_variable(y)
+    x = aesara.tensor.as_tensor_variable(x)
+    y = aesara.tensor.as_tensor_variable(y)
 
     if y.ndim > x.ndim:
         raise TypeError(
@@ -1464,7 +1464,7 @@ def make_node(self, x, y, *inputs):
         inputs: TODO WRITEME
 
         """
-        x, y = map(theano.tensor.as_tensor_variable, [x, y])
+        x, y = map(aesara.tensor.as_tensor_variable, [x, y])
         if y.ndim > x.ndim:
             raise ValueError(
                 f"Trying to increment a {int(x.ndim)}-dimensional "
@@ -1807,7 +1807,7 @@ def grad(self, inputs, grads):
             if self.set_instead_of_inc:
                 gx = set_subtensor(
                     Subtensor(idx_list=self.idx_list)(g_output, *idx_list),
-                    theano.tensor.zeros_like(y),
+                    aesara.tensor.zeros_like(y),
                 )
             else:
                 gx = g_output
@@ -1835,7 +1835,7 @@ def _sum_grad_over_bcasted_dims(x, gx):
             if gx.broadcastable[i] is False and x_broad[i] is True:
                 axis_to_sum.append(i)
             elif gx.broadcastable[i] is True and x_broad[i] is False:
-                # This means that Theano was able to infer that
+                # This means that Aesara was able to infer that
                 # gx.shape[i] is 1, so x.shape[i] is 1, but we
                 # didn't know it. It is fine.
                 pass
@@ -1867,8 +1867,8 @@ def __init__(self, sparse_grad=False):
         self.sparse_grad = sparse_grad
 
     def make_node(self, x, ilist):
-        x_ = theano.tensor.as_tensor_variable(x)
-        ilist_ = theano.tensor.as_tensor_variable(ilist)
+        x_ = aesara.tensor.as_tensor_variable(x)
+        ilist_ = aesara.tensor.as_tensor_variable(ilist)
         if ilist_.type.dtype not in integer_dtypes:
             raise TypeError("index must be integers")
         if ilist_.type.ndim != 1:
@@ -1928,7 +1928,7 @@ def grad(self, inputs, grads):
                     " from a tensor with ndim != 2. ndim is " + str(x.type.ndim)
                 )
 
-            rval1 = [theano.sparse.construct_sparse_from_list(x, gz, ilist)]
+            rval1 = [aesara.sparse.construct_sparse_from_list(x, gz, ilist)]
         else:
             if x.dtype in discrete_dtypes:
                 # The output dtype is the same as x
@@ -2087,9 +2087,9 @@ def __str__(self):
         return self.__class__.__name__ + "{%s}" % msg
 
     def make_node(self, x, y, ilist):
-        x_ = theano.tensor.as_tensor_variable(x)
-        y_ = theano.tensor.as_tensor_variable(y)
-        ilist_ = theano.tensor.as_tensor_variable(ilist)
+        x_ = aesara.tensor.as_tensor_variable(x)
+        y_ = aesara.tensor.as_tensor_variable(y)
+        ilist_ = aesara.tensor.as_tensor_variable(ilist)
 
         if ilist_.type.dtype not in integer_dtypes:
             raise TypeError("index must be integers")
@@ -2440,7 +2440,7 @@ def as_index_variable(idx):
         return idx
     if isinstance(idx, Variable) and isinstance(idx.type, NoneTypeT):
         return idx
-    idx = theano.tensor.as_tensor_variable(idx)
+    idx = aesara.tensor.as_tensor_variable(idx)
     if idx.type.dtype not in discrete_dtypes:
         raise TypeError("index must be integers or a boolean mask")
     return idx
@@ -2479,7 +2479,7 @@ class AdvancedSubtensor(Op):
     __props__ = ()
 
     def make_node(self, x, *index):
-        x = theano.tensor.as_tensor_variable(x)
+        x = aesara.tensor.as_tensor_variable(x)
         index = tuple(map(as_index_variable, index))
 
         # We only want the broadcast information, and we don't need recursive
@@ -2494,7 +2494,7 @@ def make_node(self, x, *index):
 
             bcast_index = tuple(
                 chain.from_iterable(
-                    theano.tensor.basic.nonzero(idx)
+                    aesara.tensor.basic.nonzero(idx)
                     if getattr(idx, "ndim", 0) > 0
                     and getattr(idx, "dtype", None) == "bool"
                     else (idx,)
@@ -2602,13 +2602,13 @@ def __str__(self):
         )
 
     def make_node(self, x, y, *inputs):
-        x = theano.tensor.as_tensor_variable(x)
-        y = theano.tensor.as_tensor_variable(y)
+        x = aesara.tensor.as_tensor_variable(x)
+        y = aesara.tensor.as_tensor_variable(y)
 
         new_inputs = []
         for inp in inputs:
             if isinstance(inp, (list, tuple)):
-                inp = theano.tensor.as_tensor_variable(inp)
+                inp = aesara.tensor.as_tensor_variable(inp)
             new_inputs.append(inp)
         return Apply(
             self,
@@ -2701,8 +2701,8 @@ def take(a, indices, axis=None, mode="raise"):
         input array is used.
 
     """
-    a = theano.tensor.as_tensor_variable(a)
-    indices = theano.tensor.as_tensor_variable(indices)
+    a = aesara.tensor.as_tensor_variable(a)
+    indices = aesara.tensor.as_tensor_variable(indices)
     # Reuse advanced_subtensor1 if indices is a vector
     if indices.ndim == 1:
         if mode == "clip":
@@ -2729,11 +2729,11 @@ def take(a, indices, axis=None, mode="raise"):
     else:
         # If axis is 0, don't generate a useless concatenation.
         if axis == 0:
-            shape = theano.tensor.concatenate([indices.shape, a.shape[axis + 1 :]])
+            shape = aesara.tensor.concatenate([indices.shape, a.shape[axis + 1 :]])
         else:
             if axis < 0:
                 axis += a.ndim
-            shape = theano.tensor.concatenate(
+            shape = aesara.tensor.concatenate(
                 [a.shape[:axis], indices.shape, a.shape[axis + 1 :]]
             )
         ndim = a.ndim + indices.ndim - 1
diff --git a/theano/tensor/type.py b/aesara/tensor/type.py
similarity index 96%
rename from theano/tensor/type.py
rename to aesara/tensor/type.py
index 1294acb769..91f122f6cb 100644
--- a/theano/tensor/type.py
+++ b/aesara/tensor/type.py
@@ -3,16 +3,16 @@
 
 import numpy as np
 
-import theano
-from theano import scalar as ts
-from theano.configdefaults import config
-from theano.graph.basic import Variable
-from theano.graph.type import CType
-from theano.misc.safe_asarray import _asarray
-from theano.utils import apply_across_args
+import aesara
+from aesara import scalar as ts
+from aesara.configdefaults import config
+from aesara.graph.basic import Variable
+from aesara.graph.type import CType
+from aesara.misc.safe_asarray import _asarray
+from aesara.utils import apply_across_args
 
 
-_logger = logging.getLogger("theano.tensor.type")
+_logger = logging.getLogger("aesara.tensor.type")
 
 
 # Define common subsets of dtypes (as strings).
@@ -71,7 +71,7 @@ def __init__(self, dtype, broadcastable, name=None, sparse_grad=False):
             warnings.warn(
                 "You use an old interface to"
                 " AdvancedSubtensor1 sparse_grad. Now use"
-                " theano.sparse.sparse_grad(a_tensor[an_int_vector]).",
+                " aesara.sparse.sparse_grad(a_tensor[an_int_vector]).",
                 category=DeprecationWarning,
             )
 
@@ -201,7 +201,7 @@ def filter(self, data, strict=False, allow_downcast=None):
                 msg = ""
             raise TypeError(
                 "The numpy.ndarray object is not aligned."
-                " Theano C code does not support that.",
+                " Aesara C code does not support that.",
                 msg,
                 "object shape",
                 data.shape,
@@ -287,8 +287,8 @@ def dtype_specs(self):
                 "int32": (int, "npy_int32", "NPY_INT32"),
                 "uint64": (int, "npy_uint64", "NPY_UINT64"),
                 "int64": (int, "npy_int64", "NPY_INT64"),
-                "complex128": (complex, "theano_complex128", "NPY_COMPLEX128"),
-                "complex64": (complex, "theano_complex64", "NPY_COMPLEX64"),
+                "complex128": (complex, "aesara_complex128", "NPY_COMPLEX128"),
+                "complex64": (complex, "aesara_complex64", "NPY_COMPLEX64"),
             }[self.dtype]
         except KeyError:
             raise TypeError(
@@ -319,7 +319,7 @@ def convert_variable(self, var):
                 for sb, ob in zip(self.broadcastable, var.type.broadcastable)
             )
         ):
-            return theano.tensor.patternbroadcast(var, self.broadcastable)
+            return aesara.tensor.patternbroadcast(var, self.broadcastable)
 
     @staticmethod
     def may_share_memory(a, b):
@@ -623,7 +623,7 @@ def get_size(self, shape_info):
             return np.dtype(self.dtype).itemsize
 
 
-theano.compile.ops.expandable_types += (TensorType,)
+aesara.compile.ops.expandable_types += (TensorType,)
 
 
 def values_eq_approx(
@@ -652,7 +652,7 @@ def values_eq_approx(
         if str(a.dtype) not in continuous_dtypes:
             return np.all(a == b)
         else:
-            cmp = theano.tensor.math._allclose(a, b, rtol=rtol, atol=atol)
+            cmp = aesara.tensor.math._allclose(a, b, rtol=rtol, atol=atol)
             if cmp:
                 # Numpy claims they are close, this is good enough for us.
                 return True
@@ -719,7 +719,7 @@ def values_eq_approx_always_true(a, b):
     return True
 
 
-theano.compile.register_view_op_c_code(
+aesara.compile.register_view_op_c_code(
     TensorType,
     """
     Py_XDECREF(%(oname)s);
@@ -730,7 +730,7 @@ def values_eq_approx_always_true(a, b):
 )
 
 
-theano.compile.register_deep_copy_op_c_code(
+aesara.compile.register_deep_copy_op_c_code(
     TensorType,
     """
     int alloc = %(oname)s == NULL;
@@ -783,7 +783,7 @@ def scalar(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
@@ -821,7 +821,7 @@ def vector(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable
 
@@ -856,7 +856,7 @@ def matrix(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
@@ -891,7 +891,7 @@ def row(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric type
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
@@ -920,7 +920,7 @@ def col(name=None, dtype=None):
     Parameters
     ----------
     dtype : numeric
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
@@ -949,7 +949,7 @@ def tensor3(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric type
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
@@ -980,7 +980,7 @@ def tensor4(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric type
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
@@ -1011,7 +1011,7 @@ def tensor5(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric type
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
@@ -1042,7 +1042,7 @@ def tensor6(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric type
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
@@ -1073,7 +1073,7 @@ def tensor7(name=None, dtype=None):
     Parameters
     ----------
     dtype: numeric type
-        None means to use theano.config.floatX.
+        None means to use aesara.config.floatX.
     name
         A name to attach to this variable.
 
diff --git a/theano/tensor/type_other.py b/aesara/tensor/type_other.py
similarity index 91%
rename from theano/tensor/type_other.py
rename to aesara/tensor/type_other.py
index 65b37a173d..f643a57a6c 100644
--- a/theano/tensor/type_other.py
+++ b/aesara/tensor/type_other.py
@@ -4,12 +4,12 @@
 
 import numpy as np
 
-import theano
-from theano.gradient import DisconnectedType
-from theano.graph.basic import Apply, Constant
-from theano.graph.op import Op
-from theano.graph.type import Generic, Type
-from theano.tensor.type import integer_dtypes
+import aesara
+from aesara.gradient import DisconnectedType
+from aesara.graph.basic import Apply, Constant
+from aesara.graph.op import Op
+from aesara.graph.type import Generic, Type
+from aesara.tensor.type import integer_dtypes
 
 
 def as_int_none_variable(x):
@@ -17,7 +17,7 @@ def as_int_none_variable(x):
         return NoneConst
     elif NoneConst.equals(x):
         return x
-    x = theano.tensor.as_tensor_variable(x, ndim=0)
+    x = aesara.tensor.as_tensor_variable(x, ndim=0)
     if x.type.dtype not in integer_dtypes:
         raise TypeError("index must be integers")
     return x
@@ -130,7 +130,7 @@ def may_share_memory(a, b):
 none_type_t = NoneTypeT()
 
 # This is a variable instance. It can be used only once per fgraph.
-# So use NoneConst.clone() before using it in a Theano graph.
+# So use NoneConst.clone() before using it in an Aesara graph.
 # Use NoneConst.equals(x) to check if two variable are NoneConst.
 NoneConst = Constant(none_type_t, None, name="NoneConst")
 
diff --git a/theano/tensor/utils.py b/aesara/tensor/utils.py
similarity index 92%
rename from theano/tensor/utils.py
rename to aesara/tensor/utils.py
index b2cee05914..18010afe5f 100644
--- a/theano/tensor/utils.py
+++ b/aesara/tensor/utils.py
@@ -1,7 +1,7 @@
 import numpy as np
 
-import theano
-from theano.utils import hash_from_code
+import aesara
+from aesara.utils import hash_from_code
 
 
 def hash_from_ndarray(data):
@@ -51,8 +51,8 @@ def shape_of_variables(fgraph, input_shapes):
 
     Examples
     --------
-    >>> import theano
-    >>> x = theano.tensor.matrix('x')
+    >>> import aesara
+    >>> x = aesara.tensor.matrix('x')
     >>> y = x[512:]; y.name = 'y'
     >>> fgraph = FunctionGraph([x], [y], clone=False)
     >>> d = shape_of_variables(fgraph, {x: (1024, 1024)})
@@ -63,7 +63,7 @@ def shape_of_variables(fgraph, input_shapes):
     """
 
     if not hasattr(fgraph, "shape_feature"):
-        fgraph.attach_feature(theano.tensor.basic_opt.ShapeFeature())
+        fgraph.attach_feature(aesara.tensor.basic_opt.ShapeFeature())
 
     input_dims = [
         dimension
@@ -77,7 +77,7 @@ def shape_of_variables(fgraph, input_shapes):
         for dimension in shape
     ]
 
-    compute_shapes = theano.function(input_dims, output_dims)
+    compute_shapes = aesara.function(input_dims, output_dims)
 
     if any([i not in fgraph.inputs for i in input_shapes.keys()]):
         raise ValueError(
diff --git a/theano/tensor/var.py b/aesara/tensor/var.py
similarity index 93%
rename from theano/tensor/var.py
rename to aesara/tensor/var.py
index ef56678d44..302509815e 100644
--- a/theano/tensor/var.py
+++ b/aesara/tensor/var.py
@@ -5,13 +5,13 @@
 
 import numpy as np
 
-from theano import tensor as tt
-from theano.configdefaults import config
-from theano.graph.basic import Constant, Variable
-from theano.scalar import ComplexError, IntegerDivisionError
-from theano.tensor.exceptions import AdvancedIndexingError
-from theano.tensor.type import TensorType
-from theano.tensor.utils import hash_from_ndarray
+from aesara import tensor as tt
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, Variable
+from aesara.scalar import ComplexError, IntegerDivisionError
+from aesara.tensor.exceptions import AdvancedIndexingError
+from aesara.tensor.type import TensorType
+from aesara.tensor.utils import hash_from_ndarray
 
 
 class _tensor_py_operators:
@@ -101,7 +101,7 @@ def __add__(self, other):
         try:
             return tt.math.add(self, other)
         # We should catch the minimum number of exception here.
-        # Otherwise this will convert error when Theano flags
+        # Otherwise this will convert error when Aesara flags
         # compute_test_value is used
         # Evidently, we need to catch NotImplementedError
         # TypeError from as_tensor_variable are caught in Elemwise.make_node
@@ -277,9 +277,9 @@ def all(self, axis=None, keepdims=False):
     # TODO: We could use `get_vector_length` and let it raise an exception just like
     # `__iter__` does
     # def __len__(self):
-    #     raise Exception("Theano Variables can't work with len(Theano "
+    #     raise Exception("Aesara Variables can't work with len(Aesara "
     #                     "Variable) due to Python restriction. You can use "
-    #                     "TheanoVariable.shape[0] instead.")
+    #                     "AesaraVariable.shape[0] instead.")
 
     def reshape(self, shape, ndim=None):
         """Return a reshaped view/copy of this variable.
@@ -290,13 +290,13 @@ def reshape(self, shape, ndim=None):
             Something that can be converted to a symbolic vector of integers.
         ndim
             The length of the shape. Passing None here means for
-            Theano to try and guess the length of `shape`.
+            Aesara to try and guess the length of `shape`.
 
 
         .. warning:: This has a different signature than numpy's
                      ndarray.reshape!
                      In numpy you do not need to wrap the shape arguments
-                     in a tuple, in theano you do need to.
+                     in a tuple, in aesara you do need to.
 
         """
         if ndim is not None:
@@ -478,7 +478,7 @@ def includes_bool(args_el):
                 # Python arrays can contain a mixture of bools and integers,
                 # which requires complex rules to handle all special cases.
                 # These rules differ slightly between NumPy versions.
-                # Since earlier versions of Theano did not support any boolean
+                # Since earlier versions of Aesara did not support any boolean
                 # indexing, it is safe to throw an error if we encounter
                 # any of these difficult cases.
                 if includes_bool(arg):
@@ -513,7 +513,7 @@ def is_empty_array(val):
 
         # Force input to be int64 datatype if input is an empty list or tuple
         # Else leave it as is if it is a real number
-        # Convert python literals to theano constants
+        # Convert python literals to aesara constants
         args = tuple(
             [
                 tt.subtensor.as_index_constant(
@@ -580,7 +580,7 @@ def is_empty_array(val):
         else:
             if np.newaxis in args:
                 # `np.newaxis` (i.e. `None`) in NumPy indexing mean "add a new
-                # broadcastable dimension at this location".  Since Theano adds
+                # broadcastable dimension at this location".  Since Aesara adds
                 # new broadcastable dimensions via the `DimShuffle` `Op`, the
                 # following code uses said `Op` to add one of the new axes and
                 # then uses recursion to apply any other indices and add any
@@ -643,7 +643,7 @@ def __iter__(self):
             raise TypeError(
                 "TensorType does not support iteration. "
                 "Maybe you are using builtins.sum instead of "
-                "theano.tensor.math.sum? (Maybe .max?)"
+                "aesara.tensor.math.sum? (Maybe .max?)"
             )
 
     @property
@@ -677,13 +677,13 @@ def __rdot__(right, left):
     dot = __dot__
 
     def sum(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
-        """See `theano.tensor.math.sum`."""
+        """See `aesara.tensor.math.sum`."""
         return tt.math.sum(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
 
     def prod(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
-        """See `theano.tensor.math.prod`."""
+        """See `aesara.tensor.math.prod`."""
         return tt.math.prod(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
@@ -704,79 +704,79 @@ def norm(self, L, axis=None, keepdims=False):
             return y
 
     def mean(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
-        """See `theano.tensor.math.mean`."""
+        """See `aesara.tensor.math.mean`."""
         return tt.math.mean(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
 
     def var(self, axis=None, ddof=0, keepdims=False, corrected=False):
-        """See `theano.tensor.math.var`."""
+        """See `aesara.tensor.math.var`."""
         return tt.math.var(
             self, axis=axis, ddof=ddof, keepdims=keepdims, corrected=corrected
         )
 
     def std(self, axis=None, ddof=0, keepdims=False, corrected=False):
-        """See `theano.tensor.math.std`."""
+        """See `aesara.tensor.math.std`."""
         return tt.math.std(
             self, axis=axis, ddof=ddof, keepdims=keepdims, corrected=corrected
         )
 
     def min(self, axis=None, keepdims=False):
-        """See `theano.tensor.math.min`."""
+        """See `aesara.tensor.math.min`."""
         return tt.math.min(self, axis, keepdims=keepdims)
 
     def max(self, axis=None, keepdims=False):
-        """See `theano.tensor.math.max`."""
+        """See `aesara.tensor.math.max`."""
         return tt.math.max(self, axis, keepdims=keepdims)
 
     def argmin(self, axis=None, keepdims=False):
-        """See `theano.tensor.math.argmin`."""
+        """See `aesara.tensor.math.argmin`."""
         return tt.math.argmin(self, axis, keepdims=keepdims)
 
     def argmax(self, axis=None, keepdims=False):
-        """See `theano.tensor.math.argmax`."""
+        """See `aesara.tensor.math.argmax`."""
         return tt.math.argmax(self, axis, keepdims=keepdims)
 
     def nonzero(self, return_matrix=False):
-        """See `theano.tensor.basic.nonzero`."""
+        """See `aesara.tensor.basic.nonzero`."""
         return tt.nonzero(self, return_matrix=return_matrix)
 
     def nonzero_values(self):
-        """See `theano.tensor.basic.nonzero_values`."""
+        """See `aesara.tensor.basic.nonzero_values`."""
         return tt.nonzero_values(self)
 
     def sort(self, axis=-1, kind="quicksort", order=None):
-        """See `theano.tensor.sort.sort`."""
+        """See `aesara.tensor.sort.sort`."""
         return tt.sort(self, axis, kind, order)
 
     def argsort(self, axis=-1, kind="quicksort", order=None):
-        """See `theano.tensor.sort.argsort`."""
-        from theano.tensor.sort import argsort
+        """See `aesara.tensor.sort.argsort`."""
+        from aesara.tensor.sort import argsort
 
         return argsort(self, axis, kind, order)
 
     def clip(self, a_min, a_max):
-        "See `theano.tensor.math.clip`."
+        "See `aesara.tensor.math.clip`."
         return tt.math.clip(self, a_min, a_max)
 
     def conj(self):
-        """See `theano.tensor.math.conj`."""
+        """See `aesara.tensor.math.conj`."""
         return tt.math.conj(self)
 
     conjugate = conj
 
     def repeat(self, repeats, axis=None):
-        """See `theano.tensor.basic.repeat`."""
+        """See `aesara.tensor.basic.repeat`."""
         return tt.extra_ops.repeat(self, repeats, axis)
 
     def round(self, mode=None):
-        """See `theano.tensor.math.round`."""
+        """See `aesara.tensor.math.round`."""
         return tt.math.round(self, mode)
 
     def trace(self):
         return tt.nlinalg.trace(self)
 
-    # This value is set so that Theano arrays will trump NumPy operators.
+    # This value is set so that Aesara arrays will trump NumPy operators.
     __array_priority__ = 1000
 
     def get_scalar_constant_value(self):
@@ -798,12 +798,12 @@ def searchsorted(self, v, side="left", sorter=None):
         return tt.extra_ops.searchsorted(self, v, side, sorter)
 
     def ptp(self, axis=None):
-        """See `theano.tensor.math.ptp`."""
+        """See `aesara.tensor.math.ptp`."""
 
         return tt.math.ptp(self, axis)
 
     def swapaxes(self, axis1, axis2):
-        """See `theano.tensor.basic.swapaxes`.
+        """See `aesara.tensor.basic.swapaxes`.
 
         If a matrix is provided with the right axes, its transpose
         will be returned.
@@ -850,7 +850,7 @@ def __init__(self, type, owner=None, index=None, name=None):
             msg = (
                 "You are creating a TensorVariable "
                 "with float64 dtype. You requested an action via "
-                "the Theano flag warn_float64={ignore,warn,raise,pdb}."
+                "the Aesara flag warn_float64={ignore,warn,raise,pdb}."
             )
             if config.warn_float64 == "warn":
                 # Get the user stack. We don't want function inside the
@@ -861,10 +861,10 @@ def __init__(self, type, owner=None, index=None, name=None):
                     file_path = x[-1][0]
                     rm = False
                     for p in [
-                        "theano/tensor/",
-                        "theano\\tensor\\",
-                        "theano/graph/",
-                        "theano\\tensor\\",
+                        "aesara/tensor/",
+                        "aesara\\tensor\\",
+                        "aesara/graph/",
+                        "aesara\\tensor\\",
                     ]:
                         if p in file_path:
                             x = x[:-1]
@@ -934,7 +934,7 @@ def __hash__(self):
         t, d = self
         return hash((type(self), t, d.shape, self.sum))
 
-    def theano_hash(self):
+    def aesara_hash(self):
         _, d = self
         return hash_from_ndarray(d)
 
diff --git a/theano/tensor/xlogx.py b/aesara/tensor/xlogx.py
similarity index 95%
rename from theano/tensor/xlogx.py
rename to aesara/tensor/xlogx.py
index 61083b3d79..1af7ebd88b 100644
--- a/theano/tensor/xlogx.py
+++ b/aesara/tensor/xlogx.py
@@ -1,7 +1,7 @@
 import numpy as np
 
-from theano import scalar as ts
-from theano.tensor.elemwise import Elemwise
+from aesara import scalar as ts
+from aesara.tensor.elemwise import Elemwise
 
 
 class XlogX(ts.UnaryScalarOp):
diff --git a/theano/typed_list/__init__.py b/aesara/typed_list/__init__.py
similarity index 100%
rename from theano/typed_list/__init__.py
rename to aesara/typed_list/__init__.py
diff --git a/theano/typed_list/basic.py b/aesara/typed_list/basic.py
similarity index 97%
rename from theano/typed_list/basic.py
rename to aesara/typed_list/basic.py
index 471e848e80..cf640d6622 100644
--- a/theano/typed_list/basic.py
+++ b/aesara/typed_list/basic.py
@@ -1,14 +1,14 @@
 import numpy as np
 
-import theano.tensor as tt
-from theano.compile.debugmode import _lessbroken_deepcopy
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Constant, Variable
-from theano.graph.op import COp, Op
-from theano.tensor.type import scalar
-from theano.tensor.type_other import SliceType
-from theano.tensor.var import TensorVariable
-from theano.typed_list.type import TypedListType
+import aesara.tensor as tt
+from aesara.compile.debugmode import _lessbroken_deepcopy
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Constant, Variable
+from aesara.graph.op import COp, Op
+from aesara.tensor.type import scalar
+from aesara.tensor.type_other import SliceType
+from aesara.tensor.var import TensorVariable
+from aesara.typed_list.type import TypedListType
 
 
 class _typed_list_py_operators:
@@ -665,14 +665,14 @@ def perform(self, node, inputs, outputs):
 
 make_list = MakeList()
 """
-Build a Python list from those Theano variable.
+Build a Python list from those Aesara variable.
 
 Parameters
 ----------
-a : tuple/list of Theano variable
+a : tuple/list of Aesara variable
 
 Notes
 -----
-All Theano variables must have the same type.
+All Aesara variables must have the same type.
 
 """
diff --git a/theano/typed_list/opt.py b/aesara/typed_list/opt.py
similarity index 78%
rename from theano/typed_list/opt.py
rename to aesara/typed_list/opt.py
index d7a37505a3..cb519222f9 100644
--- a/theano/typed_list/opt.py
+++ b/aesara/typed_list/opt.py
@@ -1,6 +1,6 @@
-from theano.compile import optdb
-from theano.graph.opt import TopoOptimizer, local_optimizer
-from theano.typed_list.basic import Append, Extend, Insert, Remove, Reverse
+from aesara.compile import optdb
+from aesara.graph.opt import TopoOptimizer, local_optimizer
+from aesara.typed_list.basic import Append, Extend, Insert, Remove, Reverse
 
 
 @local_optimizer([Append, Extend, Insert, Reverse, Remove], inplace=True)
diff --git a/theano/typed_list/type.py b/aesara/typed_list/type.py
similarity index 95%
rename from theano/typed_list/type.py
rename to aesara/typed_list/type.py
index 5ae92d3a6e..b056825baf 100644
--- a/theano/typed_list/type.py
+++ b/aesara/typed_list/type.py
@@ -1,4 +1,4 @@
-from theano.graph.type import CType, Type
+from aesara.graph.type import CType, Type
 
 
 class TypedListType(CType):
@@ -7,7 +7,7 @@ class TypedListType(CType):
     Parameters
     ----------
     ttype
-        Type of theano variable this list will contains, can be another list.
+        Type of aesara variable this list will contains, can be another list.
     depth
         Optionnal parameters, any value above 0 will create a nested list of
         this depth. (0-based)
@@ -19,7 +19,7 @@ def __init__(self, ttype, depth=0):
         if depth < 0:
             raise ValueError("Please specify a depth superior or" "equal to 0")
         if not isinstance(ttype, Type):
-            raise TypeError("Expected a Theano Type")
+            raise TypeError("Expected an Aesara Type")
 
         if depth == 0:
             self.ttype = ttype
diff --git a/theano/updates.py b/aesara/updates.py
similarity index 89%
rename from theano/updates.py
rename to aesara/updates.py
index 82c181fc4d..98dd0b173c 100644
--- a/theano/updates.py
+++ b/aesara/updates.py
@@ -5,17 +5,12 @@
 import warnings
 from collections import OrderedDict
 
-from theano.compile.sharedvalue import SharedVariable
+from aesara.compile.sharedvalue import SharedVariable
 
 
-__authors__ = "theano-dev"
-__copyright__ = "(c) 2010, Universite de Montreal"
-__license__ = "3-clause BSD License"
-__contact__ = "theano-dev <theano-dev@googlegroups.com>"
-
 __docformat__ = "restructuredtext en"
 
-logger = logging.getLogger("theano.updates")
+logger = logging.getLogger("aesara.updates")
 
 
 # Must be an OrderedDict or updates will be applied in a non-deterministic
@@ -53,7 +48,7 @@ def __setitem__(self, key, value):
         if isinstance(key, SharedVariable):
 
             # TODO: consider doing error-checking on value.
-            # insist that it is a Theano variable? Have the right type?
+            # insist that it is an Aesara variable? Have the right type?
             # This could have weird consequences - for example a
             # GPU SharedVariable is customarily associated with a TensorType
             # value. Should it be cast to a GPU value right away?  Should
diff --git a/theano/utils.py b/aesara/utils.py
similarity index 100%
rename from theano/utils.py
rename to aesara/utils.py
diff --git a/theano/version.py b/aesara/version.py
similarity index 92%
rename from theano/version.py
rename to aesara/version.py
index 0b3df9fa0b..0f0e7f9e24 100644
--- a/theano/version.py
+++ b/aesara/version.py
@@ -1,4 +1,4 @@
-from theano._version import get_versions
+from aesara._version import get_versions
 
 FALLBACK_VERSION = "1.0.5+unknown"
 
diff --git a/bin/aesara-cache b/bin/aesara-cache
new file mode 100755
index 0000000000..59db229ca6
--- /dev/null
+++ b/bin/aesara-cache
@@ -0,0 +1,3 @@
+#!/usr/bin/env python
+import aesara_cache
+aesara_cache.main()
diff --git a/bin/theano_cache.py b/bin/aesara_cache.py
similarity index 62%
rename from bin/theano_cache.py
rename to bin/aesara_cache.py
index 0aac8a85d6..4e6e28bc49 100644
--- a/bin/theano_cache.py
+++ b/bin/aesara_cache.py
@@ -6,44 +6,44 @@
 
 
 if sys.platform == "win32":
-    config_for_theano_cache_script = "cxx=,device=cpu"
-    theano_flags = os.environ["THEANO_FLAGS"] if "THEANO_FLAGS" in os.environ else ""
-    if theano_flags:
-        theano_flags += ","
-    theano_flags += config_for_theano_cache_script
-    os.environ["THEANO_FLAGS"] = theano_flags
+    config_for_aesara_cache_script = "cxx=,device=cpu"
+    aesara_flags = os.environ["AESARA_FLAGS"] if "AESARA_FLAGS" in os.environ else ""
+    if aesara_flags:
+        aesara_flags += ","
+    aesara_flags += config_for_aesara_cache_script
+    os.environ["AESARA_FLAGS"] = aesara_flags
 
-import theano
-import theano.compile.compiledir
-from theano import config
-from theano.link.c.basic import get_module_cache
+import aesara
+import aesara.compile.compiledir
+from aesara import config
+from aesara.link.c.basic import get_module_cache
 
 
-_logger = logging.getLogger("theano.bin.theano-cache")
+_logger = logging.getLogger("aesara.bin.aesara-cache")
 
 
 def print_help(exit_status):
     if exit_status:
         print(f"command \"{' '.join(sys.argv)}\" not recognized")
-    print('Type "theano-cache" to print the cache location')
-    print('Type "theano-cache help" to print this help')
-    print('Type "theano-cache clear" to erase the cache')
-    print('Type "theano-cache list" to print the cache content')
-    print('Type "theano-cache unlock" to unlock the cache directory')
+    print('Type "aesara-cache" to print the cache location')
+    print('Type "aesara-cache help" to print this help')
+    print('Type "aesara-cache clear" to erase the cache')
+    print('Type "aesara-cache list" to print the cache content')
+    print('Type "aesara-cache unlock" to unlock the cache directory')
     print(
-        'Type "theano-cache cleanup" to delete keys in the old ' "format/code version"
+        'Type "aesara-cache cleanup" to delete keys in the old ' "format/code version"
     )
-    print('Type "theano-cache purge" to force deletion of the cache directory')
+    print('Type "aesara-cache purge" to force deletion of the cache directory')
     print(
-        'Type "theano-cache basecompiledir" '
+        'Type "aesara-cache basecompiledir" '
         "to print the parent of the cache directory"
     )
     print(
-        'Type "theano-cache basecompiledir list" '
+        'Type "aesara-cache basecompiledir list" '
         "to print the content of the base compile dir"
     )
     print(
-        'Type "theano-cache basecompiledir purge" '
+        'Type "aesara-cache basecompiledir purge" '
         "to remove everything in the base compile dir, "
         "that is, erase ALL cache directories"
     )
@@ -66,7 +66,7 @@ def main():
 
             # Print a warning if some cached modules were not removed, so that the
             # user knows he should manually delete them, or call
-            # theano-cache purge, # to properly clear the cache.
+            # aesara-cache purge, # to properly clear the cache.
             items = [
                 item
                 for item in sorted(os.listdir(cache.dirname))
@@ -76,31 +76,31 @@ def main():
                 _logger.warning(
                     "There remain elements in the cache dir that you may "
                     "need to erase manually. The cache dir is:\n  %s\n"
-                    'You can also call "theano-cache purge" to '
+                    'You can also call "aesara-cache purge" to '
                     "remove everything from that directory." % config.compiledir
                 )
                 _logger.debug(f"Remaining elements ({len(items)}): {', '.join(items)}")
         elif sys.argv[1] == "list":
-            theano.compile.compiledir.print_compiledir_content()
+            aesara.compile.compiledir.print_compiledir_content()
         elif sys.argv[1] == "cleanup":
-            theano.compile.compiledir.cleanup()
+            aesara.compile.compiledir.cleanup()
             cache = get_module_cache(init_args=dict(do_refresh=False))
             cache.clear_old()
         elif sys.argv[1] == "unlock":
-            theano.compile.compilelock.force_unlock(config.compiledir)
+            aesara.compile.compilelock.force_unlock(config.compiledir)
             print("Lock successfully removed!")
         elif sys.argv[1] == "purge":
-            theano.compile.compiledir.compiledir_purge()
+            aesara.compile.compiledir.compiledir_purge()
         elif sys.argv[1] == "basecompiledir":
             # Simply print the base_compiledir
-            print(theano.config.base_compiledir)
+            print(aesara.config.base_compiledir)
         else:
             print_help(exit_status=1)
     elif len(sys.argv) == 3 and sys.argv[1] == "basecompiledir":
         if sys.argv[2] == "list":
-            theano.compile.compiledir.basecompiledir_ls()
+            aesara.compile.compiledir.basecompiledir_ls()
         elif sys.argv[2] == "purge":
-            theano.compile.compiledir.basecompiledir_purge()
+            aesara.compile.compiledir.basecompiledir_purge()
         else:
             print_help(exit_status=1)
     else:
diff --git a/bin/theano-cache b/bin/theano-cache
deleted file mode 100755
index d20362b442..0000000000
--- a/bin/theano-cache
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env python
-import theano_cache
-theano_cache.main()
diff --git a/conda/meta.yaml b/conda/meta.yaml
index 5a7e21c249..424fc33aa7 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,6 +1,6 @@
 package:
-  name: theano
-  version: {{ environ.get('THEANO_VERSION') }}
+  name: aesara
+  version: {{ environ.get('AESARA_VERSION') }}
 
 source:
   path: ../
@@ -25,13 +25,13 @@ requirements:
 
 test:
   imports:
-    - theano
+    - aesara
 
 about:
-  home: http://deeplearning.net/software/theano/
+  home: https://github.com/pymc-devs/aesara/
   license: BSD 3-Clause
   license_family: BSD
   summary: Optimizing compiler for evaluating mathematical expressions on CPUs and GPUs.
-  summary: Fork of Theano for PyMC3. An optimizing compiler for evaluating mathematical expressions on CPUs and GPUs.
-  dev_url: https://github.com/pymc-devs/Theano-PyMC/
-  doc_url: http://deeplearning.net/software/theano/
+  summary: Fork of Aesara for PyMC3. An optimizing compiler for evaluating mathematical expressions on CPUs and GPUs.
+  dev_url: https://github.com/pymc-devs/aesara/
+  doc_url: https://aesara.readthedocs.io/en/latest/index.html
diff --git a/conftest.py b/conftest.py
index 688c259b1c..8971c59919 100644
--- a/conftest.py
+++ b/conftest.py
@@ -4,9 +4,9 @@
 
 
 def pytest_sessionstart(session):
-    os.environ["THEANO_FLAGS"] = ",".join(
+    os.environ["AESARA_FLAGS"] = ",".join(
         [
-            os.environ.setdefault("THEANO_FLAGS", ""),
+            os.environ.setdefault("AESARA_FLAGS", ""),
             "warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise",
         ]
     )
diff --git a/doc/.static/version_switch.js b/doc/.static/version_switch.js
index 1265d9cc6d..10136107a2 100644
--- a/doc/.static/version_switch.js
+++ b/doc/.static/version_switch.js
@@ -2,36 +2,36 @@
 (function() {
 
   var url = window.location.href;
-  var theano_dir = 'theano'; // directory containing theano doc
-  // Default theano version: release and development.
-  var versions_dir = {"release": "theano", "dev": "theano_versions/dev"};
+  var aesara_dir = 'aesara'; // directory containing aesara doc
+  // Default aesara version: release and development.
+  var versions_dir = {"release": "aesara", "dev": "aesara_versions/dev"};
 
   // If doc is run localy
   if (url.startsWith('file')) {
-    theano_dir = 'html';
+    aesara_dir = 'html';
     versions_dir = {"local":"html", "test":"test"};
   }
 
-  var root_url = url.substring(0, url.search('/' + theano_dir)) + '/';
+  var root_url = url.substring(0, url.search('/' + aesara_dir)) + '/';
 
-  // Regular expression to find theano version directory in URL.
-  var version_regex = new RegExp("\\/" + theano_dir + "(_versions\\/)?([_a-zA-Z.0-9]*)\\/");
+  // Regular expression to find aesara version directory in URL.
+  var version_regex = new RegExp("\\/" + aesara_dir + "(_versions\\/)?([_a-zA-Z.0-9]*)\\/");
 
   // Get current version
   var current_version = url.match(version_regex)[0]
   current_version = current_version.substring(1, current_version.length - 1)
 
   // Add current version in case versions.json is unavailable
-  if (current_version != "theano" && current_version != "html") {
-    ver = current_version.replace("theano_versions/", "")
+  if (current_version != "aesara" && current_version != "html") {
+    ver = current_version.replace("aesara_versions/", "")
     versions_dir[ver] = current_version
   }
 
   function build_vswitch() {
   // Build HTML string for version selector, based on ReadTheDocs theme's versions.html
 
-    var vlabel = current_version.replace("theano_versions/", "");
-    if (vlabel == 'theano') {
+    var vlabel = current_version.replace("aesara_versions/", "");
+    if (vlabel == 'aesara') {
       vlabel = 'release';
     }
     var vswitch = ['<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions" align=left>'];
@@ -51,15 +51,9 @@
     }
     vswitch.push('</dl>');
 
-    vswitch.push('<dl>');
-    vswitch.push('<dt>Downloads</dt>');
-    var pdf_url = root_url + current_version + "/theano.pdf"
-    vswitch.push('<dd><a href=\"', pdf_url, '\">', 'PDF', '</a></dd>');
-    vswitch.push('</dl>');
-
     vswitch.push('<dl>');
     vswitch.push('<dt>On GitHub</dt>');
-    var git_master = "https://github.com/Theano/Theano"
+    var git_master = "https://github.com/pymc-devs/aesara"
     vswitch.push('<dd><a href=\"', git_master + '\">', 'Fork me', '</a></dd>');
     vswitch.push('</dl>');
 
@@ -71,8 +65,8 @@
   function build_vswitch_up() {
   // Build HTML string for version selector, based on ReadTheDocs theme's versions.html
 
-    var vlabel = current_version.replace("theano_versions/", "");
-    if (vlabel == 'theano') {
+    var vlabel = current_version.replace("aesara_versions/", "");
+    if (vlabel == 'aesara') {
       vlabel = 'release';
     }
     else if (vlabel != "dev") {
@@ -95,7 +89,7 @@
 
     // Check server for other doc versions and update switcher.
     if (url.startsWith('http')) {
-      $.getJSON(root_url + 'theano_versions/versions.json', function(data){
+      $.getJSON(root_url + 'aesara_versions/versions.json', function(data){
         $.each(data, function(version, dir) {
             versions_dir[version] = dir;
         });
diff --git a/doc/JaxOps.rst b/doc/JaxOps.rst
index 1905ae5f1a..67b9210fef 100644
--- a/doc/JaxOps.rst
+++ b/doc/JaxOps.rst
@@ -1,7 +1,7 @@
 Tutorial on adding JAX Ops to Aesara
 =============================
 
-A core feature of Aesara, previously named Theano-PyMC, is the JAX
+A core feature of Aesara, previously named Aesara, is the JAX
 backend. To support the backend JAX ops need be added to Aesara once to
 be supported. This tutorial will explain each step.
 
@@ -14,8 +14,8 @@ to know what we want JAX to do.
 
 | Here are the examples for ``eye`` and ``ifelse`` from Aesara from the
   compiled doc and codebase respectively
-| https://theano-pymc.readthedocs.io/en/latest/library/tensor/basic.html?highlight=eye#theano.tensor.eye
-| https://github.com/pymc-devs/Theano-PyMC/blob/master/theano/ifelse.py#L35
+| https://aesara.readthedocs.io/en/latest/library/tensor/basic.html?highlight=eye#aesara.tensor.eye
+| https://github.com/pymc-devs/aesara/blob/master/aesara/ifelse.py#L35
 
 Step 2: Find the relevant JAX method (or something close)
 =========================================================
@@ -39,7 +39,7 @@ logic.
        return res if n_outs > 1 else res[0]
 
 *Code in context:*
-https://github.com/pymc-devs/Theano-PyMC/blob/master/theano/link/jax/jax_dispatch.py#L583
+https://github.com/pymc-devs/aesara/blob/master/aesara/link/jax/jax_dispatch.py#L583
 
 Step 3: Register the function with the jax_funcify dispatcher
 =============================================================
@@ -51,13 +51,13 @@ short tutorial on dispatching is at the bottom.
 
 The linker functions should be added to ``jax_dispatch`` module linked
 below.
-https://github.com/pymc-devs/Theano-PyMC/blob/master/theano/link/jax/jax_dispatch.py
+https://github.com/pymc-devs/aesara/blob/master/aesara/link/jax/jax_dispatch.py
 
 Here’s an example for the Eye Op.
 
 .. code:: python
 
-   from theano.tensor.basic import Eye
+   from aesara.tensor.basic import Eye
 
    @jax_funcify.register(Eye) # The decorator
    def jax_funcify_Eye(op): # The function that takes an Op and returns its JAX equivalent
@@ -69,7 +69,7 @@ Here’s an example for the Eye Op.
        return eye
 
 *Code in context:*
-https://github.com/pymc-devs/Theano-PyMC/blob/master/theano/link/jax/jax_dispatch.py#L1071
+https://github.com/pymc-devs/aesara/blob/master/aesara/link/jax/jax_dispatch.py#L1071
 
 Step 4: Write tests
 ===================
@@ -82,19 +82,19 @@ compile the same function graph in Python and JAX and check that the
 numerical output is similar betwen the JAX and Python output, as well
 object types to ensure correct compilation.
 
-https://github.com/pymc-devs/Theano-PyMC/blob/master/tests/link/test_jax.py
+https://github.com/pymc-devs/aesara/blob/master/tests/link/test_jax.py
 
 .. code:: python
 
    def test_jax_eye():
        """Tests jaxification of the Eye operator"""
-       out = tt.eye(3) # Initialize a Theano Op
-       out_fg = theano.graph.fg.FunctionGraph([], [out]) # Create a Theano FunctionGraph
+       out = tt.eye(3) # Initialize an Aesara Op
+       out_fg = aesara.graph.fg.FunctionGraph([], [out]) # Create an Aesara FunctionGraph
 
        compare_jax_and_py(out_fg, []) # Pas the graph and any inputs to testing function
 
 *Code in context:*
-https://github.com/pymc-devs/Theano-PyMC/blob/056fcee1434818d0aed9234e01c754ed88d0f27a/tests/link/test_jax.py#L250
+https://github.com/pymc-devs/aesara/blob/056fcee1434818d0aed9234e01c754ed88d0f27a/tests/link/test_jax.py#L250
 
 Step 5: Wait for CI pass and Code Review
 ========================================
@@ -102,7 +102,7 @@ Step 5: Wait for CI pass and Code Review
 Create a pull request and ensure CI passes. If it does wait for a code
 review and a likely merge!
 
-https://github.com/pymc-devs/Theano-PyMC/pulls
+https://github.com/pymc-devs/aesara/pulls
 
 Appendix: What does singledispatcher do?
 ========================================
diff --git a/doc/LICENSE.txt b/doc/LICENSE.txt
index 382266964e..fea6e2d6d1 100644
--- a/doc/LICENSE.txt
+++ b/doc/LICENSE.txt
@@ -4,13 +4,17 @@ LICENSE
 =======
 
 Copyright (c) 2008--2019, Theano Development Team
-Copyright (c) 2020, PyMC dev team
+Copyright (c) 2020-2021, PyMC dev team
 All rights reserved.
 
 Contains code from NumPy, Copyright (c) 2005-2016, NumPy Developers.
 All rights reserved.
 
-Contains frozendict code from slezica’s python-frozendict(https://github.com/slezica/python-frozendict/blob/master/frozendict/__init__.py), Copyright (c) 2012 Santiago Lezica. All rights reserved. 
+theano/tensor/sharedvar.py: James Bergstra, (c) 2010, Universite de Montreal, 3-clause BSD License
+theano/gradient.py: James Bergstra, Razvan Pascanu, Arnaud Bergeron, Ian Goodfellow, PyMC Developers, (c) 2011, Universite de Montreal, 3-clause BSD License
+theano/compile/monitormode.py: this code was initially copied from the 'pyutools' package by its original author, and re-licensed under Theano's license.
+
+Contains frozendict code from slezica’s python-frozendict(https://github.com/slezica/python-frozendict/blob/master/frozendict/__init__.py), Copyright (c) 2012 Santiago Lezica. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -20,7 +24,7 @@ modification, are permitted provided that the following conditions are met:
     * Redistributions in binary form must reproduce the above copyright
       notice, this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.
-    * Neither the name of Theano nor the names of its contributors may be
+    * Neither the name of Aesara, Theano, nor the names of its contributors may be
       used to endorse or promote products derived from this software without
       specific prior written permission.
 
diff --git a/doc/acknowledgement.txt b/doc/acknowledgement.txt
index 60b7e4401c..383a3d7c25 100644
--- a/doc/acknowledgement.txt
+++ b/doc/acknowledgement.txt
@@ -11,7 +11,8 @@ Acknowledgements
 
 * The developers of `NumPy <http://numpy.scipy.org/>`_. Theano is based on its ndarray object and uses much of its implementation.
 * The developers of `SciPy <http://scipy.org/>`_. Our sparse matrix support uses their sparse matrix objects. We also reuse other parts.
-* All `Theano contributors <https://github.com/Theano/Theano/graphs/contributors>`_.
+* The developers of `Theano <https://github.com/Theano/Theano>`_
+* All `Aesara contributors <https://github.com/pymc-devs/aesara/graphs/contributors>`_.
 * All Theano users that have given us feedback.
 * The GPU implementation of tensordot is based on code from Tijmen
   Tieleman's `gnumpy <http://www.cs.toronto.edu/~tijmen/gnumpy.html>`_
diff --git a/doc/theano_installer_for_anaconda.bat b/doc/aesara_installer_for_anaconda.bat
similarity index 58%
rename from doc/theano_installer_for_anaconda.bat
rename to doc/aesara_installer_for_anaconda.bat
index 53c1956498..d8e2a22cc9 100644
--- a/doc/theano_installer_for_anaconda.bat
+++ b/doc/aesara_installer_for_anaconda.bat
@@ -19,24 +19,24 @@ echo copying dlls from %ANACONDA_DIR%\MinGW\x86_64-w64-mingw32\lib to %ANACONDA_
 copy %ANACONDA_DIR%\MinGW\x86_64-w64-mingw32\lib\*.dll %ANACONDA_DIR%
 echo done
 
-echo Trying to install theano
-pip install Theano
+echo Trying to install aesara
+pip install Aesara
 echo installed
 
-rem Put a default .theanorc.txt
-set THEANORC=%USERPROFILE%\.theanorc.txt
-set THEANORC_=%USERPROFILE%\.theanorc_install.txt
-echo [global]> %THEANORC_%
-echo openmp=False>> %THEANORC_%
-echo.>> %THEANORC_%
-echo [blas]>> %THEANORC_%
-echo ldflags=>> %THEANORC_%
-
-if exist %THEANORC% (
-    echo A .theanorc.txt config file already exists, so we will not change it.
-    echo The default version is in %THEANORC_%, we suggest you check it out.
+rem Put a default .aesararc.txt
+set AESARARC=%USERPROFILE%\.aesararc.txt
+set AESARARC_=%USERPROFILE%\.aesararc_install.txt
+echo [global]> %AESARARC_%
+echo openmp=False>> %AESARARC_%
+echo.>> %AESARARC_%
+echo [blas]>> %AESARARC_%
+echo ldflags=>> %AESARARC_%
+
+if exist %AESARARC% (
+    echo A .aesararc.txt config file already exists, so we will not change it.
+    echo The default version is in %AESARARC_%, we suggest you check it out.
 ) else (
-    rename %THEANORC_% .theanorc.txt
+    rename %AESARARC_% .aesararc.txt
 )
 
 :end
diff --git a/doc/cifarSC2011/advanced_theano.txt b/doc/cifarSC2011/advanced_theano.txt
deleted file mode 100644
index 9eafe0b14e..0000000000
--- a/doc/cifarSC2011/advanced_theano.txt
+++ /dev/null
@@ -1,534 +0,0 @@
-
-.. _advanced_theano:
-
-***************
-Advanced Theano
-***************
-
-Conditions
-----------
-**IfElse**
-
-- Build condition over symbolic variables.
-- IfElse Op takes a boolean condition and two variables to compute as input.
-- While Switch Op evaluates both 'output' variables, IfElse Op is lazy and only
-  evaluates one variable respect to the condition.
-
-**IfElse Example: Comparison with Switch**
-
-.. testcode::
-
-   from theano import tensor as tt
-   from theano.ifelse import ifelse
-   import theano, time, numpy
-
-   a,b = tt.scalars('a','b')
-   x,y = tt.matrices('x','y')
-
-   z_switch = tt.switch(tt.lt(a,b), tt.mean(x), tt.mean(y))
-   z_lazy = ifelse(tt.lt(a,b), tt.mean(x), tt.mean(y))
-
-   f_switch = theano.function([a,b,x,y], z_switch,
-                              mode=theano.compile.mode.Mode(linker='vm'))
-   f_lazyifelse = theano.function([a,b,x,y], z_lazy,
-                                  mode=theano.compile.mode.Mode(linker='vm'))
-
-   val1 = 0.
-   val2 = 1.
-   big_mat1 = numpy.ones((10000,1000))
-   big_mat2 = numpy.ones((10000,1000))
-
-   n_times = 10
-
-   tic = time.clock()
-   for i in range(n_times):
-       f_switch(val1, val2, big_mat1, big_mat2)
-   print('time spent evaluating both values %f sec' % (time.clock()-tic))
-
-   tic = time.clock()
-   for i in range(n_times):
-       f_lazyifelse(val1, val2, big_mat1, big_mat2)
-   print('time spent evaluating one value %f sec' % (time.clock()-tic))
-
-.. testoutput::
-   :hide:
-   :options: +ELLIPSIS
-
-   time spent evaluating both values ... sec
-   time spent evaluating one value ... sec
-
-IfElse Op spend less time (about an half) than Switch since it computes only
-one variable instead of both.
-
-.. code-block:: none
-
-  $ python ifelse_switch.py
-  time spent evaluating both values 0.6700 sec
-  time spent evaluating one value 0.3500 sec
-
-Note that IfElse condition is a boolean while Switch condition is a tensor, so
-Switch is more general.
-
-It is actually important to use  ``linker='vm'`` or ``linker='cvm'``,
-otherwise IfElse will compute both variables and take the same computation
-time as the Switch Op. The linker is not currently set by default to 'cvm' but
-it will be in a near future.
-
-Loops
------
-
-**Scan**
-
-- General form of **recurrence**, which can be used for looping.
-- **Reduction** and **map** (loop over the leading dimensions) are special cases of Scan
-- You 'scan' a function along some input sequence, producing an output at each time-step
-- The function can see the **previous K time-steps** of your function
-- ``sum()`` could be computed by scanning the z + x(i) function over a list, given an initial state of ``z=0``.
-- Often a for-loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
-- The advantage of using ``scan`` over for loops
-
-  - The number of iterations to be part of the symbolic graph
-  - Minimizes GPU transfers if GPU is involved
-  - Compute gradients through sequential steps
-  - Slightly faster then using a for loop in Python with a compiled Theano function
-  - Can lower the overall memory usage by detecting the actual amount of memory needed
-
-**Scan Example: Computing pow(A,k)**
-
-.. code-block:: python
-
-  import theano
-  import theano.tensor as tt
-
-  k = tt.iscalar("k"); A = tt.vector("A")
-
-  def inner_fct(prior_result, A): return prior_result * A
-  # Symbolic description of the result
-  result, updates = theano.scan(fn=inner_fct,
-                              outputs_info=tt.ones_like(A),
-                              non_sequences=A, n_steps=k)
-
-  # Scan has provided us with A**1 through A**k.  Keep only the last
-  # value. Scan notices this and does not waste memory saving them.
-  final_result = result[-1]
-
-  power = theano.function(inputs=[A,k], outputs=final_result,
-                        updates=updates)
-
-  print power(range(10),2)
-  #[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]
-
-
-**Scan Example: Calculating a Polynomial**
-
-.. testcode::
-
-  import numpy
-  import theano
-  import theano.tensor as tt
-
-  coefficients = theano.tensor.vector("coefficients")
-  x = tt.scalar("x"); max_coefficients_supported = 10000
-
-  # Generate the components of the polynomial
-  full_range=theano.tensor.arange(max_coefficients_supported)
-  components, updates = theano.scan(fn=lambda coeff, power, free_var:
-                                     coeff * (free_var ** power),
-                                  outputs_info=None,
-                                  sequences=[coefficients, full_range],
-                                  non_sequences=x)
-  polynomial = components.sum()
-  calculate_polynomial = theano.function(inputs=[coefficients, x],
-                                       outputs=polynomial)
-
-  test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
-  print(calculate_polynomial(test_coeff, 3))
-
-.. testoutput::
-
-  19.0
-
-
-
-Exercise 4
------------
-
-- Run both examples
-- Modify and execute the polynomial example to have the reduction done by scan
-
-
-
-Compilation pipeline
---------------------
-
-.. image:: ../hpcs2011_tutorial/pics/pipeline.png
-   :width: 400 px
-
-Inplace optimization
---------------------
-
-- 2 type of inplace operations:
-
-  - An op that return a view on its inputs (e.g. reshape, inplace transpose)
-  - An op that write the output on the inputs memory space
-
-- This allows some memory optimization
-- The Op must tell Theano if they work inplace
-- Inplace Op add constraints to the order of execution
-
-
-Profiling
----------
-
-- To replace the default mode with this mode, use the Theano flags ``profile=True``
-
-- To enable the memory profiling use the flags ``profile=True,profile_memory=True``
-
-Theano output for running the train function of logistic regression
-example from :doc:`here <../tutorial/examples>` for one epoch:
-
-.. code-block:: python
-
-    """
-    Function profiling
-    ==================
-      Message: train.py:47
-      Time in 1 calls to Function.__call__: 5.981922e-03s
-      Time in Function.fn.__call__: 5.180120e-03s (86.596%)
-      Time in thunks: 4.213095e-03s (70.430%)
-      Total compile time: 3.739440e-01s
-        Number of Apply nodes: 21
-        Theano Optimizer time: 3.258998e-01s
-           Theano validate time: 5.632162e-03s
-        Theano Linker time (includes C, CUDA code generation/compiling): 3.185582e-02s
-           Import time 3.157377e-03s
-
-    Time in all call to theano.grad() 2.997899e-02s
-    Time since theano import 3.616s
-    Class
-    ---
-    <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
-      50.6%    50.6%       0.002s       1.07e-03s     Py       2       2   theano.tensor.basic.Dot
-      27.2%    77.8%       0.001s       5.74e-04s     C        2       2   theano.sandbox.cuda.basic_ops.HostFromGpu
-      18.1%    95.9%       0.001s       3.81e-04s     C        2       2   theano.sandbox.cuda.basic_ops.GpuFromHost
-       2.6%    98.6%       0.000s       1.23e-05s     C        9       9   theano.tensor.elemwise.Elemwise
-       0.8%    99.3%       0.000s       3.29e-05s     C        1       1   theano.sandbox.cuda.basic_ops.GpuElemwise
-       0.3%    99.6%       0.000s       5.60e-06s     C        2       2   theano.tensor.elemwise.DimShuffle
-       0.2%    99.8%       0.000s       6.91e-06s     C        1       1   theano.sandbox.cuda.basic_ops.GpuDimShuffle
-       0.1%    99.9%       0.000s       5.01e-06s     C        1       1   theano.compile.ops.Shape_i
-       0.1%   100.0%       0.000s       5.01e-06s     C        1       1   theano.tensor.elemwise.Sum
-       ... (remaining 0 Classes account for   0.00%(0.00s) of the runtime)
-
-    Ops
-    ---
-    <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name>
-      50.6%    50.6%       0.002s       1.07e-03s     Py       2        2   dot
-      27.2%    77.8%       0.001s       5.74e-04s     C        2        2   HostFromGpu
-      18.1%    95.9%       0.001s       3.81e-04s     C        2        2   GpuFromHost
-       1.0%    97.0%       0.000s       4.39e-05s     C        1        1   Elemwise{Composite{((i0 * scalar_softplus(i1)) - (i2 * i3 * scalar_softplus(i4)))}}
-       0.8%    97.7%       0.000s       3.29e-05s     C        1        1   GpuElemwise{Sub}[(0, 1)]
-       0.4%    98.1%       0.000s       1.50e-05s     C        1        1   Elemwise{Composite{(((scalar_sigmoid(i0) * i1 * i2) / i3) - ((i4 * i1 * i5) / i3))}}[(0, 0)]
-       0.3%    98.4%       0.000s       5.60e-06s     C        2        2   InplaceDimShuffle{x}
-       0.3%    98.6%       0.000s       1.10e-05s     C        1        1   Elemwise{ScalarSigmoid}[(0, 0)]
-       0.2%    98.8%       0.000s       9.06e-06s     C        1        1   Elemwise{Composite{(i0 - (i1 * (i2 + (i3 * i0))))}}[(0, 0)]
-       0.2%    99.0%       0.000s       7.15e-06s     C        1        1   Elemwise{gt,no_inplace}
-       0.2%    99.2%       0.000s       6.91e-06s     C        1        1   Elemwise{Composite{(i0 - (i1 * i2))}}[(0, 0)]
-       0.2%    99.3%       0.000s       6.91e-06s     C        1        1   GpuDimShuffle{1,0}
-       0.2%    99.5%       0.000s       6.91e-06s     C        1        1   Elemwise{neg,no_inplace}
-       0.1%    99.6%       0.000s       5.96e-06s     C        1        1   Elemwise{Composite{((-i0) - i1)}}[(0, 0)]
-       0.1%    99.8%       0.000s       5.01e-06s     C        1        1   Elemwise{Cast{float64}}
-       0.1%    99.9%       0.000s       5.01e-06s     C        1        1   Shape_i{0}
-       0.1%   100.0%       0.000s       5.01e-06s     C        1        1   Sum{acc_dtype=float64}
-       ... (remaining 0 Ops account for   0.00%(0.00s) of the runtime)
-
-    Apply
-    ------
-    <% time> <sum %> <apply time> <time per call> <#call> <id> <Mflops> <Gflops/s> <Apply name>
-      26.8%    26.8%       0.001s       1.13e-03s      1     1                     dot(x, w)
-        input 0: dtype=float32, shape=(400, 784), strides=c
-        input 1: dtype=float64, shape=(784,), strides=c
-        output 0: dtype=float64, shape=(400,), strides=c
-      26.5%    53.4%       0.001s       1.12e-03s      1    10                     HostFromGpu(GpuDimShuffle{1,0}.0)
-        input 0: dtype=float32, shape=(784, 400), strides=(1, 784)
-        output 0: dtype=float32, shape=(784, 400), strides=c
-      23.8%    77.1%       0.001s       1.00e-03s      1    18                     dot(x.T, Elemwise{Composite{(((scalar_sigmoid(i0) * i1 * i2) / i3) - ((i4 * i1 * i5) / i3))}}[(0, 0)].0)
-        input 0: dtype=float32, shape=(784, 400), strides=c
-        input 1: dtype=float64, shape=(400,), strides=c
-        output 0: dtype=float64, shape=(784,), strides=c
-       9.6%    86.7%       0.000s       4.04e-04s      1     3                     GpuFromHost(y)
-        input 0: dtype=float32, shape=(400,), strides=c
-        output 0: dtype=float32, shape=(400,), strides=(1,)
-       8.5%    95.2%       0.000s       3.58e-04s      1     2                     GpuFromHost(x)
-        input 0: dtype=float32, shape=(400, 784), strides=c
-        output 0: dtype=float32, shape=(400, 784), strides=(784, 1)
-       1.0%    96.3%       0.000s       4.39e-05s      1    13                     Elemwise{Composite{((i0 * scalar_softplus(i1)) - (i2 * i3 * scalar_softplus(i4)))}}(y, Elemwise{Composite{((-i0) - i1)}}[(0, 0)].0, TensorConstant{(1,) of -1.0}, HostFromGpu.0, Elemwise{neg,no_inplace}.0)
-        input 0: dtype=float32, shape=(400,), strides=c
-        input 1: dtype=float64, shape=(400,), strides=c
-        input 2: dtype=float64, shape=(1,), strides=c
-        input 3: dtype=float32, shape=(400,), strides=c
-        input 4: dtype=float64, shape=(400,), strides=c
-        output 0: dtype=float64, shape=(400,), strides=c
-       0.8%    97.1%       0.000s       3.29e-05s      1     7                     GpuElemwise{Sub}[(0, 1)](CudaNdarrayConstant{[ 1.]}, GpuFromHost.0)
-        input 0: dtype=float32, shape=(1,), strides=c
-        input 1: dtype=float32, shape=(400,), strides=(1,)
-        output 0: dtype=float32, shape=(400,), strides=c
-       0.7%    97.7%       0.000s       2.91e-05s      1    11                     HostFromGpu(GpuElemwise{Sub}[(0, 1)].0)
-        input 0: dtype=float32, shape=(400,), strides=c
-        output 0: dtype=float32, shape=(400,), strides=c
-       0.4%    98.1%       0.000s       1.50e-05s      1    15                     Elemwise{Composite{(((scalar_sigmoid(i0) * i1 * i2) / i3) - ((i4 * i1 * i5) / i3))}}[(0, 0)](Elemwise{Composite{((-i0) - i1)}}[(0, 0)].0, TensorConstant{(1,) of -1.0}, y, Elemwise{Cast{float64}}.0, Elemwise{ScalarSigmoid}[(0, 0)].0, HostFromGpu.0)
-        input 0: dtype=float64, shape=(400,), strides=c
-        input 1: dtype=float64, shape=(1,), strides=c
-        input 2: dtype=float32, shape=(400,), strides=c
-        input 3: dtype=float64, shape=(1,), strides=c
-        input 4: dtype=float64, shape=(400,), strides=c
-        input 5: dtype=float32, shape=(400,), strides=c
-        output 0: dtype=float64, shape=(400,), strides=c
-       0.3%    98.4%       0.000s       1.10e-05s      1    14                     Elemwise{ScalarSigmoid}[(0, 0)](Elemwise{neg,no_inplace}.0)
-        input 0: dtype=float64, shape=(400,), strides=c
-        output 0: dtype=float64, shape=(400,), strides=c
-       0.2%    98.6%       0.000s       9.06e-06s      1    20                     Elemwise{Composite{(i0 - (i1 * (i2 + (i3 * i0))))}}[(0, 0)](w, TensorConstant{(1,) of 0...0000000149}, dot.0, TensorConstant{(1,) of 0...9999999553})
-        input 0: dtype=float64, shape=(784,), strides=c
-        input 1: dtype=float64, shape=(1,), strides=c
-        input 2: dtype=float64, shape=(784,), strides=c
-        input 3: dtype=float64, shape=(1,), strides=c
-        output 0: dtype=float64, shape=(784,), strides=c
-       0.2%    98.7%       0.000s       7.15e-06s      1    16                     Elemwise{gt,no_inplace}(Elemwise{ScalarSigmoid}[(0, 0)].0, TensorConstant{(1,) of 0.5})
-        input 0: dtype=float64, shape=(400,), strides=c
-        input 1: dtype=float32, shape=(1,), strides=c
-        output 0: dtype=int8, shape=(400,), strides=c
-       0.2%    98.9%       0.000s       7.15e-06s      1     0                     InplaceDimShuffle{x}(b)
-        input 0: dtype=float64, shape=(), strides=c
-        output 0: dtype=float64, shape=(1,), strides=c
-       0.2%    99.1%       0.000s       6.91e-06s      1    19                     Elemwise{Composite{(i0 - (i1 * i2))}}[(0, 0)](b, TensorConstant{0.10000000149}, Sum{acc_dtype=float64}.0)
-        input 0: dtype=float64, shape=(), strides=c
-        input 1: dtype=float64, shape=(), strides=c
-        input 2: dtype=float64, shape=(), strides=c
-        output 0: dtype=float64, shape=(), strides=c
-       0.2%    99.2%       0.000s       6.91e-06s      1     9                     Elemwise{neg,no_inplace}(Elemwise{Composite{((-i0) - i1)}}[(0, 0)].0)
-        input 0: dtype=float64, shape=(400,), strides=c
-        output 0: dtype=float64, shape=(400,), strides=c
-       0.2%    99.4%       0.000s       6.91e-06s      1     6                     GpuDimShuffle{1,0}(GpuFromHost.0)
-        input 0: dtype=float32, shape=(400, 784), strides=(784, 1)
-        output 0: dtype=float32, shape=(784, 400), strides=(1, 784)
-       0.1%    99.5%       0.000s       5.96e-06s      1     5                     Elemwise{Composite{((-i0) - i1)}}[(0, 0)](dot.0, InplaceDimShuffle{x}.0)
-        input 0: dtype=float64, shape=(400,), strides=c
-        input 1: dtype=float64, shape=(1,), strides=c
-        output 0: dtype=float64, shape=(400,), strides=c
-       0.1%    99.7%       0.000s       5.01e-06s      1    17                     Sum{acc_dtype=float64}(Elemwise{Composite{(((scalar_sigmoid(i0) * i1 * i2) / i3) - ((i4 * i1 * i5) / i3))}}[(0, 0)].0)
-        input 0: dtype=float64, shape=(400,), strides=c
-        output 0: dtype=float64, shape=(), strides=c
-       0.1%    99.8%       0.000s       5.01e-06s      1    12                     Elemwise{Cast{float64}}(InplaceDimShuffle{x}.0)
-        input 0: dtype=int64, shape=(1,), strides=c
-        output 0: dtype=float64, shape=(1,), strides=c
-       0.1%    99.9%       0.000s       5.01e-06s      1     4                     Shape_i{0}(y)
-        input 0: dtype=float32, shape=(400,), strides=c
-        output 0: dtype=int64, shape=(), strides=c
-       ... (remaining 1 Apply instances account for 0.10%(0.00s) of the runtime)
-
-    Memory Profile
-    (Sparse variables are ignored)
-    (For values in brackets, it's for linker = c|py
-    ---
-        Max if no gc (allow_gc=False): 2469KB (2469KB)
-        CPU: 1242KB (1242KB)
-        GPU: 1227KB (1227KB)
-    ---
-        Max if linker=cvm(default): 2466KB (2464KB)
-        CPU: 1241KB (1238KB)
-        GPU: 1225KB (1227KB)
-    ---
-        Memory saved if views are used: 1225KB (1225KB)
-        Memory saved if inplace ops are used: 17KB (17KB)
-        Memory saved if gc is enabled: 3KB (4KB)
-    ---
-
-        <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>
-
-           1254400B  [(400, 784)] c GpuFromHost(x)
-           1254400B  [(784, 400)] v GpuDimShuffle{1,0}(GpuFromHost.0)
-           1254400B  [(784, 400)] c HostFromGpu(GpuDimShuffle{1,0}.0)
-              6272B  [(784,)] c dot(x.T, Elemwise{Composite{(((scalar_sigmoid(i0) * i1 * i2) / i3) - ((i4 * i1 * i5) / i3))}}[(0, 0)].0)
-              6272B  [(784,)] i Elemwise{Composite{(i0 - (i1 * (i2 + (i3 * i0))))}}[(0, 0)](w, TensorConstant{(1,) of 0...0000000149}, dot.0, TensorConstant{(1,) of 0...9999999553})
-              3200B  [(400,)] c dot(x, w)
-              3200B  [(400,)] i Elemwise{Composite{((-i0) - i1)}}[(0, 0)](dot.0, InplaceDimShuffle{x}.0)
-              3200B  [(400,)] i Elemwise{ScalarSigmoid}[(0, 0)](Elemwise{neg,no_inplace}.0)
-              3200B  [(400,)] c Elemwise{neg,no_inplace}(Elemwise{Composite{((-i0) - i1)}}[(0, 0)].0)
-              3200B  [(400,)] i Elemwise{Composite{(((scalar_sigmoid(i0) * i1 * i2) / i3) - ((i4 * i1 * i5) / i3))}}[(0, 0)](Elemwise{Composite{((-i0) - i1)}}[(0, 0)].0, TensorConstant{(1,) of -1.0}, y, Elemwise{Cast{float64}}.0, Elemwise{ScalarSigmoid}[(0, 0)].0, HostFromGpu.0)
-              3200B  [(400,)] c Elemwise{Composite{((i0 * scalar_softplus(i1)) - (i2 * i3 * scalar_softplus(i4)))}}(y, Elemwise{Composite{((-i0) - i1)}}[(0, 0)].0, TensorConstant{(1,) of -1.0}, HostFromGpu.0, Elemwise{neg,no_inplace}.0)
-              1600B  [(400,)] i GpuElemwise{Sub}[(0, 1)](CudaNdarrayConstant{[ 1.]}, GpuFromHost.0)
-              1600B  [(400,)] c HostFromGpu(GpuElemwise{Sub}[(0, 1)].0)
-              1600B  [(400,)] c GpuFromHost(y)
-       ... (remaining 7 Apply account for  448B/3800192B ((0.01%)) of the Apply with dense outputs sizes)
-
-        <created/inplace/view> is taken from the Op's declaration.
-        Apply nodes marked 'inplace' or 'view' may actually allocate memory, this is not reported here. If you use DebugMode, warnings will be emitted in those cases.
-
-    Here are tips to potentially make your code run faster
-                     (if you think of new ones, suggest them on the mailing list).
-                     Test them first, as they are not guaranteed to always provide a speedup.
-      Sorry, no tip for today.
-    """
-
-Exercise 5
------------
-
-- In the last exercises, do you see a speed up with the GPU?
-- Where does it come from? (Use profile=True)
-- Is there something we can do to speed up the GPU version?
-
-
-Printing/Drawing Theano graphs
-------------------------------
-
-Consider the following logistic regression model:
-
->>> import numpy
->>> import theano
->>> import theano.tensor as tt
->>> rng = numpy.random
->>> # Training data
->>> N = 400
->>> feats = 784
->>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
->>> training_steps = 10000
->>> # Declare Theano symbolic variables
->>> x = tt.matrix("x")
->>> y = tt.vector("y")
->>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
->>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
->>> x.tag.test_value = D[0]
->>> y.tag.test_value = D[1]
->>> # Construct Theano expression graph
->>> p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
->>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
->>> # Compute gradients
->>> xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
->>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
->>> gw,gb = tt.grad(cost, [w,b])
->>> # Training and prediction function
->>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
->>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
-
-We will now make use of Theano's printing features to compare the unoptimized
-graph (``prediction``) to the optimized graph (``predict``).
-
-
-Pretty Printing
-~~~~~~~~~~~~~~~
-
->>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
-'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))), TensorConstant{0.5})'
-
-
-Debug Print
-~~~~~~~~~~~
-
-The graph before optimization:
-
->>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE, +SKIP
-    Elemwise{gt,no_inplace} [@A] ''
-    |Elemwise{true_div,no_inplace} [@B] ''
-    | |DimShuffle{x} [@C] ''
-    | | |TensorConstant{1} [@D]
-    | |Elemwise{add,no_inplace} [@E] ''
-    |   |DimShuffle{x} [@F] ''
-    |   | |TensorConstant{1} [@D]
-    |   |Elemwise{exp,no_inplace} [@G] ''
-    |     |Elemwise{sub,no_inplace} [@H] ''
-    |       |Elemwise{neg,no_inplace} [@I] ''
-    |       | |dot [@J] ''
-    |       |   |x [@K]
-    |       |   |w [@L]
-    |       |DimShuffle{x} [@M] ''
-    |         |b [@N]
-    |DimShuffle{x} [@O] ''
-      |TensorConstant{0.5} [@P]
-
-The graph after optimization:
-
->>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE, +SKIP
-    Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] ''   4
-     |CGemv{inplace} [@B] ''   3
-     | |Alloc [@C] ''   2
-     | | |TensorConstant{0.0} [@D]
-     | | |Shape_i{0} [@E] ''   1
-     | |   |x [@F]
-     | |TensorConstant{1.0} [@G]
-     | |x [@F]
-     | |w [@H]
-     | |TensorConstant{0.0} [@D]
-     |InplaceDimShuffle{x} [@I] ''   0
-     | |b [@J]
-     |TensorConstant{(1,) of 0.5} [@K]
-
-
-Picture Printing of Graphs
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-``pydotprint`` requires graphviz and either pydot or pydot-ng.
-
-The graph before optimization:
-
->>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)  # doctest: +SKIP
-The output file is available at pics/logreg_pydotprint_prediction.png
-
-.. image:: ./pics/logreg_pydotprint_prediction.png
-   :width: 800 px
-
-The graph after optimization:
-
->>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)  # doctest: +SKIP
-The output file is available at pics/logreg_pydotprint_predict.png
-
-.. image:: ./pics/logreg_pydotprint_predict.png
-   :width: 800 px
-
-The optimized training graph:
-
->>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)  # doctest: +SKIP
-The output file is available at pics/logreg_pydotprint_train.png
-
-.. image:: ./pics/logreg_pydotprint_train.png
-   :width: 1500 px
-
-
-Debugging
----------
-
-- Run with the flag ``mode=DebugMode``
-
-  - 100-1000x slower
-  - Test all optimization steps from the original graph to the final graph
-  - Checks many things that Op should/shouldn't do
-  - Executes both the Python and C code versions
-
-- Run with the Theano flag ``compute_test_value = {``off'',``ignore'', ``warn'', ``raise''}``
-
-  - Run the code as we create the graph
-  - Allows you to find the bug earlier (ex: shape mismatch)
-  - Makes it easier to identify where the problem is in *your* code
-  - Use the value of constants and shared variables directly
-  - For pure symbolic variables uses ``x.tag.test_value = numpy.random.rand(5,10)``
-
-- Run with the flag ``mode=FAST_COMPILE``
-
-  - Few optimizations
-  - Run Python code (better error messages and can be debugged interactively in the Python debugger)
-
-Known limitations
------------------
-
-- Compilation phase distinct from execution phase
-- Compilation time can be significant
-
-  - Amortize it with functions over big input or reuse functions
-
-- Execution overhead
-
-  - Needs a certain number of operations to be useful
-  - We have started working on this in a branch
-
-- Compilation time superlinear in the size of the graph.
-
-  - A few hundreds nodes is fine
-  - Disabling a few optimizations can speed up compilation
-  - Usually too many nodes indicates a problem with the graph
diff --git a/doc/cifarSC2011/boot_camp_overview.txt b/doc/cifarSC2011/boot_camp_overview.txt
deleted file mode 100644
index 861bd1164f..0000000000
--- a/doc/cifarSC2011/boot_camp_overview.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-
-.. _cifar_summer_school2011_overview:
-
-
-========
-Schedule
-========
-
-
-Theano lab sessions will be in 4 x 90 minute blocks,
-on the afternoons of Aug 2, 3, 5, and 6 (but not Aug 4th).
-
-Day 1
------
-
-* Show of hands - what is your background?
-
-* Python & NumPy in a nutshell
-
-* Theano basics
-
-* Quick tour through Deep Learning Tutorials (think about projects)
-
-.. :
-    day 1:
-    I think that I could cover those 2 pages:
-     * http://deeplearning.net/software/theano/hpcs2011_tutorial/introduction.html
-     * http://deeplearning.net/software/theano/hpcs2011_tutorial/theano.html
-    That include:
-     simple example
-     linear regression example with shared var
-     theano flags
-     grad detail
-     Symbolic variables
-     gpu
-     benchmarck
-
-Day 2
------
-
-* Loop/Condition in Theano (10-20m)
-
-* Propose/discuss projects
-
-* Form groups and start projects!
-
-Day 3
------
-
-* Advanced Theano (30 minutes)
-
- * Debugging, profiling, compilation pipeline
-
-* Projects / General hacking / code-sprinting.
-
-Day 4
------
-
-* *You choose* (we can split the group)
-
- * Extending Theano
-
-  * How to write an Op
-
-  * How to use pycuda code in Theano
-
-* Projects / General hacking / code-sprinting.
-
diff --git a/doc/cifarSC2011/gpundarray.txt b/doc/cifarSC2011/gpundarray.txt
deleted file mode 100644
index 05a05abaea..0000000000
--- a/doc/cifarSC2011/gpundarray.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-
-.. _cifar2013_gpundarray:
-
-**********
-GpuNdArray
-**********
-
-Why a common GPU ndarray?
-
-- Currently there are at least 4 different GPU array data structures in use by Python packages
-
-  - CudaNdarray (Theano), GPUArray (PyCUDA), CUDAMatrix (cudamat), GPUArray (PyOpenCL), ...
-  - There are even more if we include other languages
-
-- All of them are a subset of the functionality of ``numpy.ndarray`` on the GPU
-- Lots of duplicated effort
-
-  - GPU code is harder/slower to do {\bf correctly} and {\bf fast} than on the CPU/Python
-
-- Lack of a common array API makes it harder to port/reuse code
-- Also harder to find/distribute code
-- Divides development work
-
-
-Design Goals
-
-- Make it VERY similar to ``numpy.ndarray``
-- Be compatible with both CUDA and OpenCL
-- Have the base object accessible from C to allow collaboration with more projects, across high-level languages
-
-  - We want people from C, C++, Ruby, R, ... all use the same base GPU N-dimensional array
-
-
-Final GpuNdArray Note
-
-- Under development
-- Will be the next GPU array container for Theano (this summer!)
-- Probably also for PyCUDA, PyOpenCL
-- Mailing list: http://lists.tiker.net/listinfo/gpundarray
-
diff --git a/doc/cifarSC2011/index.txt b/doc/cifarSC2011/index.txt
deleted file mode 100644
index e9627488dd..0000000000
--- a/doc/cifarSC2011/index.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-:orphan:
-
-.. _index:
-
-=========================================
-TheanoLab @ NCAP CIFAR Summer School 2011
-=========================================
-
-Aug 2-6, 2011, Toronto, Canada.
-
-
-Theano is python software for evaluating complicated array expressions.
-
-What does it do?
-
- * aggressive expression optimizations,
-
- * automatic GPU use, and
-
- * symbolic differentiation.
-
-It complements the Python numeric/scientific software stack (e.g. NumPy, SciPy,
-scikits, matplotlib, PIL.)
-
-Design and feature set has been driven by machine learning research
-at the University of
-Montreal (groups of Yoshua Bengio, Pascal Vincent, Douglas Eck).
-The result is a very good library for doing research in deep
-learning and neural network training, and a flexible framework for
-many other models and algorithms in machine learning more generally.
-
-It has proven to be useful for implementing:
-
- - linear and nonlinear neural network classifiers
-
- - convolutional models
-
- - Energy models: RBM, DBN, GRBM, ssRBM, AIS
-
- - Auto-encoders: DAE, CAE
-
- - GP regression
-
- - sparse coding
-
- - recurrent neural networks, echo state, (HMM?)
-
- - online and batch learning and optimization
-
-As people's needs change this list will grow, but Theano is built around vector,
-matrix, and tensor expressions; there is little reason to use it for
-calculations on other data structures.
-
-
-Contents
---------
-
-The structured part of these lab sessions will be a walk-through of the following
-material. Interleaved with this structured part will be blocks of time for
-individual or group work.  The idea is that you can try out Theano and get help
-from gurus on hand if you get stuck.
-
-.. toctree::
-
-    boot_camp_overview
-    introduction
-    theano
-    advanced_theano
-    /extending/extending_theano
-    pyCUDA
-    gpundarray
-
diff --git a/doc/cifarSC2011/introduction.txt b/doc/cifarSC2011/introduction.txt
deleted file mode 100644
index 149212f2d9..0000000000
--- a/doc/cifarSC2011/introduction.txt
+++ /dev/null
@@ -1,422 +0,0 @@
-
-.. _cifarSS2011_Introduction:
-
-
-************
-Introduction
-************
-
-Background Questionaire
------------------------
-
-* Who has used Theano before?
-
- * What did you do with it?
-
-* Who has used Python? NumPy? SciPy? matplotlib?
-
-* Who has used iPython?
-
- * Who has used it as a distributed computing engine?
-
-* Who has done C/C++ programming?
-
-* Who has organized computation around a particular physical memory layout?
-
-* Who has used a multidimensional array of >2 dimensions?
-
-* Who has written a Python module in C before?
-
- * Who has written a program to *generate* Python modules in C?
-
-* Who has used a templating engine?
-
-* Who has programmed a GPU before?
-
- * Using OpenGL / shaders ?
-
- * Using CUDA (runtime? / driver?)
-
- * Using PyCUDA ?
-
- * Using OpenCL / PyOpenCL ?
-
- * Using cudamat / gnumpy ?
-
- * Other?
-
-* Who has used Cython?
-
-
-Python in one slide
--------------------
-
-* General-purpose high-level OO interpreted language
-
-* Emphasizes code readability
-
-* Comprehensive standard library
-
-* Dynamic type and memory management
-
-* Built-in types: int, float, str, list, dict, tuple, object
-
-* Slow execution
-
-* Popular in web-dev and scientific communities
-
-
-.. code-block:: python
-
-    #######################
-    # PYTHON SYNTAX EXAMPLE
-    #######################
-    a = 1                     # no type declaration required!
-    b = (1, 2, 3)             # tuple of three int literals
-    c = [1, 2, 3]             # list of three int literals
-    d = {'a': 5, b: None}     # dictionary of two elements
-                              # N.B. string literal, None
-
-    print d['a']              # square brackets index
-    # -> 5
-    print d[(1, 2, 3)]        # new tuple == b, retrieves None
-    # -> None
-    print d[6]
-    # raises KeyError Exception
-
-    x, y, z = 10, 100, 100    # multiple assignment from tuple
-    x, y, z = b               # unpacking a sequence
-
-    b_squared = [b_i**2 for b_i in b]  # list comprehension
-
-    def foo(b, c=3):          # function w default param c
-        return a + b + c      # note scoping, indentation
-
-    foo(5)                    # calling a function
-    # -> 1 + 5 + 3 == 9       # N.B. scoping
-    foo(b=6, c=2)             # calling with named args
-    # -> 1 + 6 + 2 == 9
-
-    print b[1:3]              # slicing syntax
-
-    class Foo(object):        # Defining a class
-        def __init__(self):
-            self.a = 5
-        def hello(self):
-            return self.a
-
-    f = Foo()                 # Creating a class instance
-    print f.hello()           # Calling methods of objects
-    # -> 5
-
-    class Bar(Foo):           # Defining a subclass
-        def __init__(self, a):
-            self.a = a
-
-    print Bar(99).hello()     # Creating an instance of Bar
-    # -> 99
-
-NumPy in one slide
-------------------
-
-* Python floats are full-fledged objects on the heap
-
- * Not suitable for high-performance computing!
-
-* NumPy provides a N-dimensional numeric array in Python
-
- * Perfect for high-performance computing.
-
-* NumPy provides
-
- * elementwise computations
-
- * linear algebra, Fourier transforms
-
- * pseudorandom numbers from many distributions
-
-* SciPy provides lots more, including
-
- * more linear algebra
-
- * solvers and optimization algorithms
-
- * matlab-compatible I/O
-
- * I/O and signal processing for images and audio
-
-.. code-block:: python
-
-    ##############################
-    # Properties of NumPy arrays
-    # that you really need to know
-    ##############################
-
-    import numpy as np          # import can rename
-    a = np.random.rand(3, 4, 5) # random generators
-    a32 = a.astype('float32')   # arrays are strongly typed
-
-    a.ndim                      # int: 3
-    a.shape                     # tuple: (3, 4, 5)
-    a.size                      # int: 60
-    a.dtype                     # np.dtype object: 'float64'
-    a32.dtype                   # np.dtype object: 'float32'
-
-Arrays can be combined with numeric operators, standard mathematical
-functions. NumPy has great `documentation <http://docs.scipy.org/doc/numpy/reference/>`_.
-
-Training an MNIST-ready classification neural network in pure NumPy might look like this:
-
-.. code-block:: python
-
-    #########################
-    # NumPy for Training a
-    # Neural Network on MNIST
-    #########################
-
-    x = np.load('data_x.npy')
-    y = np.load('data_y.npy')
-    w = np.random.normal(
-        avg=0,
-        std=.1,
-        size=(784, 500))
-    b = np.zeros((500,))
-    v = np.zeros((500, 10))
-    c = np.zeros((10,))
-
-    batchsize = 100
-    for i in range(1000):
-        x_i = x[i * batchsize: (i + 1) * batchsize]
-        y_i = y[i * batchsize: (i + 1) * batchsize]
-
-        hidin = np.dot(x_i, w) + b
-
-        hidout = np.tanh(hidin)
-
-        outin = np.dot(hidout, v) + c
-        outout = (np.tanh(outin) + 1) / 2.0
-
-        g_outout = outout - y_i
-        err = 0.5 * np.sum(g_outout ** 2)
-
-        g_outin = g_outout * outout * (1.0 - outout)
-
-        g_hidout = np.dot(g_outin, v.T)
-        g_hidin = g_hidout * (1 - hidout ** 2)
-
-        b -= lr * np.sum(g_hidin, axis=0)
-        c -= lr * np.sum(g_outin, axis=0)
-        w -= lr * np.dot(x_i.T, g_hidin)
-        v -= lr * np.dot(hidout.T, g_outin)
-
-
-What's missing?
----------------
-
-* Non-lazy evaluation (required by Python) hurts performance
-
-* NumPy is bound to the CPU
-
-* NumPy lacks symbolic or automatic differentiation
-
-Now let's have a look at the same algorithm in Theano, which runs 15 times faster if
-you have GPU (I'm skipping some dtype-details which we'll come back to).
-
-.. code-block:: python
-
-    #########################
-    # Theano for Training a
-    # Neural Network on MNIST
-    #########################
-
-    import numpy as np
-
-    import theano
-    import theano.tensor as tt
-
-    x = np.load('data_x.npy')
-    y = np.load('data_y.npy')
-
-    # symbol declarations
-    sx = tt.matrix()
-    sy = tt.matrix()
-    w = theano.shared(np.random.normal(avg=0, std=.1,
-                                       size=(784, 500)))
-    b = theano.shared(np.zeros(500))
-    v = theano.shared(np.zeros((500, 10)))
-    c = theano.shared(np.zeros(10))
-
-    # symbolic expression-building
-    hid = tt.tanh(tt.dot(sx, w) + b)
-    out = tt.tanh(tt.dot(hid, v) + c)
-    err = 0.5 * tt.sum(out - sy) ** 2
-    gw, gb, gv, gc = theano.grad(err, [w, b, v, c])
-
-    # compile a fast training function
-    train = theano.function([sx, sy], err,
-        updates={
-            w: w - lr * gw,
-            b: b - lr * gb,
-            v: v - lr * gv,
-            c: c - lr * gc})
-
-    # now do the computations
-    batchsize = 100
-    for i in range(1000):
-        x_i = x[i * batchsize: (i + 1) * batchsize]
-        y_i = y[i * batchsize: (i + 1) * batchsize]
-        err_i = train(x_i, y_i)
-
-
-Theano in one slide
--------------------
-
-* High-level domain-specific language tailored to numeric computation
-
-* Compiles most common expressions to C for CPU and GPU.
-
-* Limited expressivity means lots of opportunities for expression-level optimizations
-
- * No function call -> global optimization
-
- * Strongly typed -> compiles to machine instructions
-
- * Array oriented -> parallelizable across cores
-
- * Support for looping and branching in expressions
-
-* Expression substitution optimizations automatically draw
-  on many backend technologies for best performance.
-
- * FFTW, MKL, ATLAS, SciPy, Cython, CUDA
-
- * Slower fallbacks always available
-
-* Automatic differentiation
-
-
-Project status
---------------
-
-* Mature: theano has been developed and used since January 2008 (3.5 yrs old)
-
-* Driven over 40 research papers in the last few years
-
-* Good user documentation
-
-* Active mailing list with participants from outside our lab
-
-* Core technology for a funded Silicon-Valley startup
-
-* Many contributors (some from outside our lab)
-
-* Used to teach IFT6266 for two years
-
-* Used for research at Google and Yahoo.
-
-* Unofficial RPMs for Mandriva
-
-* Downloads (January 2011 -  June 8 2011):
-
- * Pypi 780
-
- * MLOSS: 483
-
- * Assembla (`bleeding edge` repository): unknown
-
-
-
-Why scripting for GPUs?
------------------------
-
-They *Complement each other*:
-
-* GPUs are everything that scripting/high level languages are not
-
- * Highly parallel
-
- * Very architecture-sensitive
-
- * Built for maximum FP/memory throughput
-
- * So hard to program that meta-programming is easier.
-
-* CPU: largely restricted to control
-
- * Optimized for sequential code and low latency (rather than high throughput)
-
- * Tasks (1000/sec)
-
- * Scripting fast enough
-
-Best of both: scripted CPU invokes JIT-compiled kernels on GPU.
-
-
-How Fast are GPUs?
-------------------
-
-* Theory
-
- * Intel Core i7 980 XE (107Gf/s float64) 6 cores
-
- * NVIDIA C2050 (515 Gf/s float64, 1Tf/s float32) 480 cores
-
- * NVIDIA GTX580 (1.5Tf/s float32) 512 cores
-
- * GPUs are faster, cheaper, more power-efficient
-
-* Practice (our experience)
-
- * Depends on algorithm and implementation!
-
- * Reported speed improvements over CPU in lit. vary *widely* (.01x to 1000x)
-
- * Matrix-matrix multiply speedup: usually about 10-20x.
-
- * Convolution speedup: usually about 15x.
-
- * Elemwise speedup: slower or up to 100x (depending on operation and layout)
-
- * Sum: can be faster or slower depending on layout.
-
-* Benchmarking is delicate work...
-
- * How to control quality of implementation?
-
-  * How much time was spent optimizing CPU vs GPU code?
-
- * Theano goes up to 100x faster on GPU because it uses only one CPU core
-
- * Theano can be linked with multi-core capable BLAS (GEMM and GEMV)
-
-* If you see speedup > 100x, the benchmark is probably not fair.
-
-
-Software for Directly Programming a GPU
----------------------------------------
-
-Theano is a meta-programmer, doesn't really count.
-
-* CUDA: C extension by NVIDIA
-
- * Vendor-specific
-
- * Numeric libraries (BLAS, RNG, FFT) maturing.
-
-* OpenCL: multi-vendor version of CUDA
-
- * More general, standardized
-
- * Fewer libraries, less adoption.
-
-* PyCUDA: python bindings to CUDA driver interface
-
- * Python interface to CUDA
-
- * Memory management of GPU objects
-
- * Compilation of code for the low-level driver
-
- * Makes it easy to do GPU meta-programming from within Python
-
-* PyOpenCL: PyCUDA for PyOpenCL
diff --git a/doc/cifarSC2011/pics/logreg_pydotprint_predict.png b/doc/cifarSC2011/pics/logreg_pydotprint_predict.png
deleted file mode 100644
index d4d9047a87..0000000000
Binary files a/doc/cifarSC2011/pics/logreg_pydotprint_predict.png and /dev/null differ
diff --git a/doc/cifarSC2011/pics/logreg_pydotprint_prediction.png b/doc/cifarSC2011/pics/logreg_pydotprint_prediction.png
deleted file mode 100644
index acaa154cbc..0000000000
Binary files a/doc/cifarSC2011/pics/logreg_pydotprint_prediction.png and /dev/null differ
diff --git a/doc/cifarSC2011/pics/logreg_pydotprint_train.png b/doc/cifarSC2011/pics/logreg_pydotprint_train.png
deleted file mode 100644
index 627df9893c..0000000000
Binary files a/doc/cifarSC2011/pics/logreg_pydotprint_train.png and /dev/null differ
diff --git a/doc/cifarSC2011/pyCUDA.txt b/doc/cifarSC2011/pyCUDA.txt
deleted file mode 100644
index 48741c1c22..0000000000
--- a/doc/cifarSC2011/pyCUDA.txt
+++ /dev/null
@@ -1,142 +0,0 @@
-
-.. _pyCUDA:
-
-******
-PyCUDA
-******
-
-Introduction
-------------
-
-Authors: Andreas Klockner
-
-- PyCUDA can access Nvidia's CUDA parallel computation API from Python
-- Object cleanup tied to lifetime of objects (RAII, Resource Acquisition Is Initialization).
-
-  - Makes it much easier to write correct, leak- and crash-free code
-  - PyCUDA knows about dependencies (e.g.. it won't detach from a context before all memory allocated in it is also freed)
-
-- Convenience
-
-  - Abstractions to compile CUDA code from Python: ``pycuda.driver.SourceModule``
-  - A GPU memory buffer: \texttt{pycuda.gpuarray.GPUArray}
-
-- Completeness
-
-  - Binding to all of CUDA's driver API
-
-- Automatic Error Checking
-
-  - All CUDA errors are automatically translated into Python exceptions
-
-- Speed
-
-  - PyCUDA's base layer is written in C++
-
-- Helpful documentation
-
-
-Example
--------
-
-.. code-block:: python
-
-  import pycuda.autoinit
-  import pycuda.driver as drv
-  import numpy
-
-  from pycuda.compiler import SourceModule
-  mod = SourceModule("""
-  __global__ void multiply_them(float *dest, float *a, float *b)
-  {
-    const int i = threadIdx.x;
-    dest[i] = a[i] * b[i];
-  }
-  """)
-
-  multiply_them = mod.get_function("multiply_them")
-
-  a = numpy.random.randn(400).astype(numpy.float32)
-  b = numpy.random.randn(400).astype(numpy.float32)
-
-  dest = numpy.zeros_like(a)
-  multiply_them(
-          drv.Out(dest), drv.In(a), drv.In(b),
-          block=(400,1,1), grid=(1,1))
-
-  assert numpy.allclose(dest, a*b)
-  print dest
-
-
-Exercise 6
-----------
-
-- Run the above example
-- Modify and execute it to work for a matrix of 20 x 10
-
-
-.. _cifar2011_pyCUDA_theano:
-
-Theano + PyCUDA
----------------
-
-.. testcode::
-
-    import numpy, theano
-    import theano.misc.pycuda_init
-    from pycuda.compiler import SourceModule
-    import theano.sandbox.cuda as cuda
-    from theano.graph.basic import Apply
-    from theano.graph.op import Op
-
-
-    class PyCUDADoubleOp(Op):
-        def make_node(self, inp):
-            inp = cuda.basic_ops.gpu_contiguous(
-               cuda.basic_ops.as_cuda_ndarray_variable(inp))
-            assert inp.dtype == "float32"
-            return Apply(self, [inp], [inp.type()])
-        def make_thunk(self, node, storage_map, _, _2):
-            mod = SourceModule("""
-        __global__ void my_fct(float * i0, float * o0, int size) {
-        int i = blockIdx.x*blockDim.x + threadIdx.x;
-        if(i<size){
-            o0[i] = i0[i]*2;
-        }
-      }""")
-            pycuda_fct = mod.get_function("my_fct")
-            inputs = [ storage_map[v] for v in node.inputs]
-            outputs = [ storage_map[v] for v in node.outputs]
-            def thunk():
-                z = outputs[0]
-                if z[0] is None or z[0].shape!=inputs[0][0].shape:
-                    z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
-                grid = (int(numpy.ceil(inputs[0][0].size / 512.)),1)
-                pycuda_fct(inputs[0][0], z[0], numpy.intc(inputs[0][0].size),
-                           block=(512,1,1), grid=grid)
-            return thunk
-
-.. testoutput::
-   :hide:
-   :options: +SKIP
-
-   This contains GPU code so skip it
-
-Test it!
-
->>> x = theano.tensor.fmatrix() # doctest: +SKIP
->>> f = theano.function([x], PyCUDADoubleOp()(x)) # doctest: +SKIP
->>> xv=numpy.ones((4,5), dtype="float32") # doctest: +SKIP
->>> assert numpy.allclose(f(xv), xv*2) # doctest: +SKIP
->>> print numpy.asarray(f(xv)) # doctest: +SKIP
-
-Exercises 7
------------
-
-- Run the above example
-- Modify and execute the example to multiple two matrix: x * y
-- Modify and execute the example to return 2 outputs: x + y and x - y
-
-  - Our current elemwise fusion generate computation with only 1 outputs
-
-- Modify and execute the example to support stride? (Don't force the input to be c contiguous)
diff --git a/doc/cifarSC2011/theano.txt b/doc/cifarSC2011/theano.txt
deleted file mode 100644
index 417ddddca5..0000000000
--- a/doc/cifarSC2011/theano.txt
+++ /dev/null
@@ -1,375 +0,0 @@
-
-.. _theano:
-
-******
-Theano
-******
-
-Pointers
---------
-
-* http://deeplearning.net/software/theano/
-* Announcements mailing list: http://groups.google.com/group/theano-announce
-* User mailing list: http://groups.google.com/group/theano-users
-* Deep Learning Tutorials: http://www.deeplearning.net/tutorial/
-* Installation: https://deeplearning.net/software/theano/install.html
-
-
-Description
------------
-
-* Mathematical symbolic expression compiler
-* Dynamic C/CUDA code generation
-* Efficient symbolic differentiation
-
-  * Theano computes derivatives of functions with one or many inputs.
-
-* Speed and stability optimizations
-
-  * Gives the right answer for ``log(1+x)`` even if x is really tiny.
-
-* Works on Linux, Mac and Windows
-* Transparent use of a GPU
-
-  * float32 only for now (working on other data types)
-  * Still in experimental state on Windows
-  * On GPU data-intensive calculations are typically between 6.5x and 44x faster. We've seen speedups up to 140x
-
-* Extensive unit-testing and self-verification
-
-  * Detects and diagnoses many types of errors
-
-* On CPU, common machine learning algorithms are 1.6x to 7.5x faster than competitive alternatives
-
-  * including specialized implementations in C/C++, NumPy, SciPy, and Matlab
-
-* Expressions mimic NumPy's syntax & semantics
-* Statically typed and purely functional
-* Some sparse operations (CPU only)
-* The project was started by James Bergstra and Olivier Breuleux
-* For the past 1-2 years, I have replaced Olivier as lead contributor
-
-Simple example
---------------
-
->>> import theano
->>> a = theano.tensor.vector("a")      # declare symbolic variable
->>> b = a + a**10                      # build symbolic expression
->>> f = theano.function([a], b)        # compile function
->>> f([0,1,2])
-array([    0.,     2.,  1026.])
-
-
-======================================================  =====================================================
-        Unoptimized graph                                    Optimized graph
-======================================================  =====================================================
-.. image:: ../hpcs2011_tutorial/pics/f_unoptimized.png   .. image:: ../hpcs2011_tutorial/pics/f_optimized.png
-======================================================  =====================================================
-
-Symbolic programming = *Paradigm shift*: people need to use it to understand it.
-
-Exercise 1
------------
-
-.. code-block:: python
-
-  import theano
-  a = theano.tensor.vector() # declare variable
-  out = a + a**10               # build symbolic expression
-  f = theano.function([a], out)   # compile function
-  print f([0,1,2])
-  # prints `array([0,2,1026])`
-
-  theano.printing.pydotprint_variables(b, outfile="f_unoptimized.png", var_with_name_simple=True)
-  theano.printing.pydotprint(f, outfile="f_optimized.png", var_with_name_simple=True)
-
-Modify and execute the example to do this expression: a**2 + b**2 + 2*a*b
-
-Real example
-------------
-
-**Logistic Regression**
-
-* GPU-ready
-* Symbolic differentiation
-* Speed optimizations
-* Stability optimizations
-
-.. code-block:: python
-
-  import numpy
-  import theano
-  import theano.tensor as tt
-  rng = numpy.random
-
-  N = 400
-  feats = 784
-  D = (rng.randn(N, feats), rng.randint(size=N,low=0, high=2))
-  training_steps = 10000
-
-  # Declare Theano symbolic variables
-  x = tt.matrix("x")
-  y = tt.vector("y")
-  w = theano.shared(rng.randn(feats), name="w")
-  b = theano.shared(0., name="b")
-  print "Initial model:"
-  print w.get_value(), b.get_value()
-
-  # Construct Theano expression graph
-  p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b))     # Probability that target = 1
-  prediction = p_1 > 0.5                    # The prediction thresholded
-  xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy loss function
-  cost = xent.mean() + 0.01*(w**2).sum()    # The cost to minimize
-  gw,gb = tt.grad(cost, [w,b])
-
-  # Compile
-  train = theano.function(
-            inputs=[x,y],
-            outputs=[prediction, xent],
-            updates={w:w-0.1*gw, b:b-0.1*gb})
-  predict = theano.function(inputs=[x], outputs=prediction)
-
-  # Train
-  for i in range(training_steps):
-      pred, err = train(D[0], D[1])
-
-  print "Final model:"
-  print w.get_value(), b.get_value()
-  print "target values for D:", D[1]
-  print "prediction on D:", predict(D[0])
-
-
-**Optimizations:**
-
-Where are those optimization applied?
-
-* ``log(1+exp(x))``
-
-* ``1 / (1 + tt.exp(var))`` (sigmoid)
-
-* ``log(1-sigmoid(var))`` (softplus, stabilisation)
-
-* GEMV (matrix-vector multiply from BLAS)
-
-* Loop fusion
-
-
-.. code-block:: python
-
-  p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b))
-  # 1 / (1 + tt.exp(var)) -> sigmoid(var)
-  xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1)
-  # Log(1-sigmoid(var)) -> -sigmoid(var)
-  prediction = p_1 > 0.5
-  cost = xent.mean() + 0.01*(w**2).sum()
-  gw,gb = tt.grad(cost, [w,b])
-
-  train = theano.function(
-            inputs=[x,y],
-            outputs=[prediction, xent],
-            # w-0.1*gw: GEMV with the dot in the grad
-            updates={w:w-0.1*gw, b:b-0.1*gb})
-
-
-Theano flags
-------------
-
-Theano can be configured with flags. They can be defined in two ways
-
-* With an environment variable: ``THEANO_FLAGS="profile=True,profile_memory=True"``
-
-* With a configuration file that defaults to ``~/.theanorc``
-
-
-Exercise 2
------------
-
-.. code-block:: python
-
-    import numpy
-    import theano
-    import theano.tensor as tt
-    rng = numpy.random
-
-    N = 400
-    feats = 784
-    D = (rng.randn(N, feats).astype(theano.config.floatX),
-    rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
-    training_steps = 10000
-
-    # Declare Theano symbolic variables
-    x = tt.matrix("x")
-    y = tt.vector("y")
-    w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
-    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
-    x.tag.test_value = D[0]
-    y.tag.test_value = D[1]
-    #print "Initial model:"
-    #print w.get_value(), b.get_value()
-
-
-    # Construct Theano expression graph
-    p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
-    prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
-    xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
-    cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
-    gw,gb = tt.grad(cost, [w,b])
-
-    # Compile expressions to functions
-    train = theano.function(
-                inputs=[x,y],
-                outputs=[prediction, xent],
-                updates={w:w-0.01*gw, b:b-0.01*gb},
-                name = "train")
-    predict = theano.function(inputs=[x], outputs=prediction,
-                name = "predict")
-
-    if any( [x.op.__class__.__name__=='Gemv' for x in
-    train.maker.fgraph.toposort()]):
-        print 'Used the cpu'
-    elif any( [x.op.__class__.__name__=='GpuGemm' for x in
-    train.maker.fgraph.toposort()]):
-        print 'Used the gpu'
-    else:
-        print 'ERROR, not able to tell if theano used the cpu or the gpu'
-        print train.maker.fgraph.toposort()
-
-
-
-    for i in range(training_steps):
-        pred, err = train(D[0], D[1])
-    #print "Final model:"
-    #print w.get_value(), b.get_value()
-
-    print "target values for D"
-    print D[1]
-
-    print "prediction on D"
-    print predict(D[0])
-
-    # Print the graph used in the slides
-    theano.printing.pydotprint(predict,
-                               outfile="pics/logreg_pydotprint_predic.png",
-                               var_with_name_simple=True)
-    theano.printing.pydotprint_variables(prediction,
-                               outfile="pics/logreg_pydotprint_prediction.png",
-                               var_with_name_simple=True)
-    theano.printing.pydotprint(train,
-                               outfile="pics/logreg_pydotprint_train.png",
-                               var_with_name_simple=True)
-
-Modify and execute the example to run on CPU with floatX=float32
-
-* You will need to use: ``theano.config.floatX`` and ``ndarray.astype("str")``
-
-GPU
----
-
-* Only 32 bit floats are supported (being worked on)
-* Only 1 GPU per process
-* Use the Theano flag ``device=gpu`` to tell to use the GPU device
-
- * Use ``device=gpu{0, 1, ...}`` to specify which GPU if you have more than one
- * Shared variables with float32 dtype are by default moved to the GPU memory space
-
-* Use the Theano flag ``floatX=float32``
-
- * Be sure to use ``floatX`` (``theano.config.floatX``) in your code
- * Cast inputs before putting them into a shared variable
- * Cast "problem": int32 with float32 to float64
-
-  * A new casting mechanism is being developed
-  * Insert manual cast in your code or use [u]int{8,16}
-  * Insert manual cast around the mean operator (which involves a division by the length, which is an int64!)
-
-
-
-Exercise 3
------------
-
-* Modify and execute the example of `Exercise 2`_ to run with floatX=float32 on GPU
-
-* Time with: ``time python file.py``
-
-Symbolic variables
-------------------
-
-* # Dimensions
-
- * tt.scalar, tt.vector, tt.matrix, tt.tensor3, tt.tensor4
-
-* Dtype
-
- * tt.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
-
- * tt.vector to floatX dtype
-
- * floatX: configurable dtype that can be float32 or float64.
-
-* Custom variable
-
- * All are shortcuts to: ``tt.tensor(dtype, broadcastable=[False]*nd)``
-
- * Other dtype: uint[8,16,32,64], floatX
-
-Creating symbolic variables: Broadcastability
-
-* Remember what I said about broadcasting?
-
-* How to add a row to all rows of a matrix?
-
-* How to add a column to all columns of a matrix?
-
-
-Details regarding symbolic broadcasting...
-
-* Broadcastability must be specified when creating the variable
-
-* The only shorcut with broadcastable dimensions are: **tt.row** and **tt.col**
-
-* For all others: ``tt.tensor(dtype, broadcastable=([False or True])*nd)``
-
-
-Differentiation details
------------------------
-
->>> gw,gb = tt.grad(cost, [w,b])  # doctest: +SKIP
-
-* tt.grad works symbolically: takes and returns a Theano variable
-
-* tt.grad can be compared to a macro: it can be applied multiple times
-
-* tt.grad takes scalar costs only
-
-* Simple recipe allows to compute efficiently vector x Jacobian and vector x Hessian
-
-* We are working on the missing optimizations to be able to compute efficently the full Jacobian and Hessian and Jacobian x vector
-
-
-.. _cifar2011_benchmark:
-
-Benchmarks
-----------
-
-
-**Multi-Layer Perceptron**:
-
-60x784 matrix times 784x500 matrix, tanh, times 500x10 matrix, elemwise, then all in reverse for backpropagation
-
-.. image:: ../hpcs2011_tutorial/pics/mlp.png
-
-**Convolutional Network**:
-
-256x256 images convolved with 6 7x7 filters,
-downsampled to 6x50x50, tanh, convolution with 16 6x7x7 filter, elementwise
-tanh, matrix multiply, softmax elementwise, then in reverse
-
-.. image:: ../hpcs2011_tutorial/pics/conv.png
-
-**Elemwise**
-
-* All on CPU
-* Solid blue: Theano
-* Dashed Red: numexpr (without MKL)
-
-.. image:: ../hpcs2011_tutorial/pics/multiple_graph.png
diff --git a/doc/citation.txt b/doc/citation.txt
index 26b667f2f1..e9519cda5f 100644
--- a/doc/citation.txt
+++ b/doc/citation.txt
@@ -3,27 +3,16 @@
 .. _citation:
 
 
-Theano Citation Policy
+Aesara Citation Policy
 ======================
 
-If you use Theano for academic research, you are highly encouraged (though not
+If you use Aesara for academic research, you are highly encouraged (though not
 required) to cite the following paper:
 
 * Theano Development Team. `"Theano: A Python framework for fast computation of mathematical expressions"
   <http://arxiv.org/pdf/1605.02688.pdf>`_.
   (:download:`short BibTeX <theano-short.bib>`, :download:`full BibTeX <theano-full.bib>`)
 
-Theano is primarily developed by academics, and so citations matter a lot to
-us. As an added benefit, you increase Theano's exposure and potential user
-(and developer) base, which is to the benefit of all users of Theano. Thanks
-in advance!
-
-Previously, we asked users of Theano to cite the two papers below.
-However, many members of our community have contributed to Theano in the
-meantime, and it is inconvenient to cite multiple papers. We intend to
-introduce new papers periodically, and only ask for users to cite the
-single most recent paper with the most comprehensive author list.
-
 Earlier articles:
 
 * F. Bastien, P. Lamblin, R. Pascanu, J. Bergstra, I. Goodfellow,
diff --git a/doc/conf.py b/doc/conf.py
index a844758dc8..b7455fa7c9 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# theano documentation build configuration file, created by
+# aesara documentation build configuration file, created by
 # sphinx-quickstart on Tue Oct  7 16:34:06 2008.
 #
 # This file is execfile()d with the current directory set to its containing
@@ -16,16 +16,15 @@
 # If your extensions are in another directory, add it here. If the directory
 # is relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
-#sys.path.append(os.path.abspath('some/directory'))
-
+# sys.path.append(os.path.abspath('some/directory'))
 
 
 import os
 import sys
-import theano
+import aesara
 
-theano_path = os.path.join(os.path.dirname(__file__), os.pardir)
-sys.path.append(os.path.abspath(theano_path))
+aesara_path = os.path.join(os.path.dirname(__file__), os.pardir)
+sys.path.append(os.path.abspath(aesara_path))
 import versioneer
 
 # General configuration
@@ -33,11 +32,13 @@
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc',
-              'sphinx.ext.todo',
-              'sphinx.ext.doctest',
-              'sphinx.ext.napoleon',
-              'sphinx.ext.linkcode']
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.todo",
+    "sphinx.ext.doctest",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.linkcode",
+]
 
 todo_include_todos = True
 napoleon_google_docstring = False
@@ -47,27 +48,29 @@
 # Our buildbot consider warning as error.
 try:
     from sphinx.ext import imgmath
-    extensions.append('sphinx.ext.imgmath')
+
+    extensions.append("sphinx.ext.imgmath")
 except ImportError:
     try:
         from sphinx.ext import pngmath
-        extensions.append('sphinx.ext.pngmath')
+
+        extensions.append("sphinx.ext.pngmath")
     except ImportError:
         pass
 
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['.templates']
+templates_path = [".templates"]
 
 # The suffix of source filenames.
-source_suffix = '.txt'
+source_suffix = ".txt"
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General substitutions.
-project = 'Theano'
-copyright = '2008--2019, LISA lab'
+project = "Aesara"
+copyright = "PyMC Developers, 2020-2021; 2008--2019, LISA lab"
 
 # The default replacements for |version| and |release|, also used in various
 # other places throughout the built documents.
@@ -80,40 +83,40 @@
 # The full version, including alpha/beta/rc tags.
 release = versioneer.get_version()
 # The short X.Y version.
-version = '.'.join(release.split('.')[:2])
+version = ".".join(release.split(".")[:2])
 os.chdir(_curpath)
 del _curpath
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-today_fmt = '%B %d, %Y'
+today_fmt = "%B %d, %Y"
 
 # List of documents that shouldn't be included in the build.
-#unused_docs = []
+# unused_docs = []
 
 # List of directories, relative to source directories, that shouldn't be
 # searched for source files.
-exclude_dirs = ['images', 'scripts', 'sandbox']
+exclude_dirs = ["images", "scripts", "sandbox"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 
 # Options for HTML output
@@ -122,7 +125,7 @@
 # The style sheet to use for HTML and HTML Help pages. A file of that name
 # must exist either in Sphinx' static/ path, or in one of the custom paths
 # given in html_static_path.
-#html_style = 'default.css'
+# html_style = 'default.css'
 # html_theme = 'sphinxdoc'
 
 # html4_writer added to Fix colon & whitespace misalignment
@@ -130,76 +133,78 @@
 html4_writer = True
 
 # Read the docs style:
-if os.environ.get('READTHEDOCS') != 'True':
+if os.environ.get("READTHEDOCS") != "True":
     try:
         import sphinx_rtd_theme
     except ImportError:
         pass  # assume we have sphinx >= 1.3
     else:
         html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-    html_theme = 'sphinx_rtd_theme'
+    html_theme = "sphinx_rtd_theme"
+
 
 def setup(app):
     app.add_stylesheet("fix_rtd.css")
 
+
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (within the static path) to place at the top of
 # the sidebar.
-html_logo = 'images/theano_logo_allwhite_210x70.png'
+# html_logo = 'images/aesara_logo.png'
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['.static', 'images', 'library/d3viz/examples']
+html_static_path = [".static", "images", "library/d3viz/examples"]
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-html_last_updated_fmt = '%b %d, %Y'
+html_last_updated_fmt = "%b %d, %Y"
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
 html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_use_modindex = True
+# html_use_modindex = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, the reST sources are included in the HTML build as _sources/<name>.
-#html_copy_source = True
+# html_copy_source = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = ''
+# html_file_suffix = ''
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'theanodoc'
+htmlhelp_basename = "aesara_doc"
 
 # Options for the linkcode extension
 # ----------------------------------
@@ -209,61 +214,60 @@ def linkcode_resolve(domain, info):
     def find_source():
         # try to find the file and line number, based on code from numpy:
         # https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286
-        obj = sys.modules[info['module']]
-        for part in info['fullname'].split('.'):
+        obj = sys.modules[info["module"]]
+        for part in info["fullname"].split("."):
             obj = getattr(obj, part)
         import inspect
         import os
+
         fn = inspect.getsourcefile(obj)
-        fn = os.path.relpath(fn, start=os.path.dirname(theano.__file__))
+        fn = os.path.relpath(fn, start=os.path.dirname(aesara.__file__))
         source, lineno = inspect.getsourcelines(obj)
         return fn, lineno, lineno + len(source) - 1
 
-    if domain != 'py' or not info['module']:
+    if domain != "py" or not info["module"]:
         return None
     try:
-        filename = 'theano/%s#L%d-L%d' % find_source()
+        filename = "aesara/%s#L%d-L%d" % find_source()
     except Exception:
-        filename = info['module'].replace('.', '/') + '.py'
+        filename = info["module"].replace(".", "/") + ".py"
     import subprocess
-    tag = subprocess.Popen(['git', 'rev-parse', 'HEAD'],
-                           stdout=subprocess.PIPE,
-                           universal_newlines=True).communicate()[0][:-1]
-    return f"https://github.com/Theano/theano/blob/{tag}/{filename}"
+
+    tag = subprocess.Popen(
+        ["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, universal_newlines=True
+    ).communicate()[0][:-1]
+    return f"https://github.com/pymc-devs/aesara/blob/{tag}/{filename}"
+
 
 # Options for LaTeX output
 # ------------------------
 
 latex_elements = {
     # The paper size ('letter' or 'a4').
-    #latex_paper_size = 'letter',
-
+    # latex_paper_size = 'letter',
     # The font size ('10pt', '11pt' or '12pt').
-    'pointsize': '11pt',
-
+    "pointsize": "11pt",
     # Additional stuff for the LaTeX preamble.
-    #latex_preamble = '',
+    # latex_preamble = '',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, document class
 # [howto/manual]).
 latex_documents = [
-  ('index', 'theano.tex', 'theano Documentation',
-   'LISA lab, University of Montreal', 'manual'),
+    ("index", "aesara.tex", "aesara Documentation", "PyMC Developers", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = 'images/snake_theta2-trans.png'
-latex_logo = 'images/theano_logo_allblue_200x46.png'
+# latex_logo = 'images/aesara_logo_allblue_200x46.png'
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_use_modindex = True
+# latex_use_modindex = True
diff --git a/doc/core_development_guide.txt b/doc/core_development_guide.txt
index 5e08df6602..f16449ab56 100644
--- a/doc/core_development_guide.txt
+++ b/doc/core_development_guide.txt
@@ -3,39 +3,38 @@
 Core Development Guide
 =======================
 
-The documentation of the core components of Theano is still a work in
+The documentation of the core components of Aesara is still a work in
 progress. For now this is a list of bits and pieces on the subject,
 some of them might be outdated though:
 
 
-* :ref:`theano_type` -- Tutorial for writing a new type in Theano. It
-  introduces the basics concerning Theano datatypes.
+* :ref:`aesara_type` -- Tutorial for writing a new type in Aesara. It
+  introduces the basics concerning Aesara datatypes.
 
-* :ref:`theano_ctype` -- Tutorial on how to make your type C-friendly.
+* :ref:`aesara_ctype` -- Tutorial on how to make your type C-friendly.
 
-* :ref:`views_and_inplace` -- This is somewhere between extending Theano and
-  describing how Theano works internally; it talks about views and inplace
+* :ref:`views_and_inplace` -- This is somewhere between extending Aesara and
+  describing how Aesara works internally; it talks about views and inplace
   operations.
 
-* :ref:`optimization` -- Tutorial on how optimization work in Theano.
+* :ref:`optimization` -- Tutorial on how optimization work in Aesara.
 
-* :ref:`pipeline` -- Describes the steps of compiling a Theano Function.
+* :ref:`pipeline` -- Describes the steps of compiling an Aesara Function.
 
 * :ref:`graphstructures` -- Describes the symbolic graphs generated by
-  :mod:`theano.scan`.
+  :mod:`aesara.scan`.
 
-* :ref:`unittest` -- Tutorial on how to use unittest in testing Theano.
+* :ref:`unittest` -- Tutorial on how to use unittest in testing Aesara.
 
 * :ref:`sandbox_debugging_step_mode` -- How to step through the execution of
-  a Theano function and print the inputs and outputs of each op.
+  an Aesara function and print the inputs and outputs of each op.
 
 * :ref:`sandbox_elemwise` -- Description of element wise operations.
 
 * :ref:`sandbox_maxgotcha` -- Describes the difference between ``numpy.max``
   and Python max (something to consider when using max).
 
-* :ref:`sandbox_randnb` -- Description of how Theano deals with random
+* :ref:`sandbox_randnb` -- Description of how Aesara deals with random
   numbers.
 
-* :ref:`sparse` -- Description of the ``sparse`` type in Theano.
-
+* :ref:`sparse` -- Description of the ``sparse`` type in Aesara.
diff --git a/doc/crei2013/advanced_theano.txt b/doc/crei2013/advanced_theano.txt
deleted file mode 100644
index 9cffd2d0da..0000000000
--- a/doc/crei2013/advanced_theano.txt
+++ /dev/null
@@ -1,285 +0,0 @@
-
-.. _crei2013_advanced_theano:
-
-***************
-Advanced Theano
-***************
-
-
-Profiling
----------
-
-- To replace the default mode with this mode, use the Theano flags ``profile=True``
-
-- To enable the memory profiling use the flags ``profile_memory=True``
-
-Theano output:
-
-.. literalinclude:: logreg_profile.prof
-
-Compilation pipeline
---------------------
-
-.. image:: ../hpcs2011_tutorial/pics/pipeline.png
-   :width: 400 px
-
-
-Inplace optimization
---------------------
-
-- 2 type of inplace operations:
-
-  - An op that return a view on its inputs (e.g. reshape, inplace transpose)
-  - An op that write the output on the inputs memory space
-
-- This allows some memory optimization
-- The Op must tell Theano if they work inplace
-- Inplace Op add constraints to the order of execution
-
-
-Conditions
-----------
-**IfElse**
-
-- Build condition over symbolic variables.
-- IfElse Op takes a boolean condition and two variables to compute as input.
-- While Switch Op evaluates both 'output' variables, IfElse Op is lazy and only
-  evaluates one variable respect to the condition.
-
-**IfElse Example: Comparison with Switch**
-
-.. literalinclude:: ifelse_switch.py
-
-IfElse Op spend less time (about an half) than Switch since it computes only
-one variable instead of both.
-
->>> python ifelse_switch.py # doctest: +SKIP
-time spent evaluating both values 0.230000 sec
-time spent evaluating one value 0.120000 sec
-
-Note that IfElse condition is a boolean while Switch condition is a tensor, so
-Switch is more general.
-
-It is actually important to use  ``linker='vm'`` or ``linker='cvm'``,
-otherwise IfElse will compute both variables and take the same computation
-time as the Switch Op. The linker is not currently set by default to 'cvm' but
-it will be in a near future.
-
-Loops
------
-
-**Scan**
-
-- General form of **recurrence**, which can be used for looping.
-- **Reduction** and **map** (loop over the leading dimensions) are special cases of Scan
-- You 'scan' a function along some input sequence, producing an output at each time-step
-- The function can see the **previous K time-steps** of your function
-- ``sum()`` could be computed by scanning the z + x(i) function over a list, given an initial state of ``z=0``.
-- Often a for-loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
-- The advantage of using ``scan`` over for loops
-
-  - The number of iterations to be part of the symbolic graph
-  - Minimizes GPU transfers if GPU is involved
-  - Compute gradients through sequential steps
-  - Slightly faster then using a for loop in Python with a compiled Theano function
-  - Can lower the overall memory usage by detecting the actual amount of memory needed
-
-**Scan Example: Computing pow(A,k)**
-
-.. literalinclude:: scan_pow.py
-
-
-**Scan Example: Calculating a Polynomial**
-
-.. literalinclude:: scan_poly.py
-
-Exercise 4
------------
-
-- Run both examples
-- Modify and execute the polynomial example to have the reduction done by scan
-
-
-Exercise 5
------------
-
-- In the last exercises, do you see a speed up with the GPU?
-- Where does it come from? (Use profile=True)
-- Is there something we can do to speed up the GPU version?
-
-
-
-Printing/Drawing Theano graphs
-------------------------------
-
-Consider the following logistic regression model:
-
->>> import numpy
->>> import theano
->>> import theano.tensor as tt
->>> rng = numpy.random
->>> # Training data
->>> N = 400
->>> feats = 784
->>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
->>> training_steps = 10000
->>> # Declare Theano symbolic variables
->>> x = tt.matrix("x")
->>> y = tt.vector("y")
->>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
->>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
->>> x.tag.test_value = D[0]
->>> y.tag.test_value = D[1]
->>> # Construct Theano expression graph
->>> p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b)) # Probability of having a one
->>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
->>> # Compute gradients
->>> xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
->>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
->>> gw,gb = tt.grad(cost, [w,b])
->>> # Training and prediction function
->>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
->>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
-
-We will now make use of Theano's printing features to compare the unoptimized
-graph (``prediction``) to the optimized graph (``predict``).
-
-Pretty Printing
-~~~~~~~~~~~~~~~
-
->>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
-'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))), TensorConstant{0.5})'
-
-
-Debug Print
-~~~~~~~~~~~
-
-The graph before optimization:
-
-.. doctest::
-   :options: +SKIP
-
-   >>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE
-   Elemwise{gt,no_inplace} [@A] ''
-    |Elemwise{true_div,no_inplace} [@B] ''
-    | |DimShuffle{x} [@C] ''
-    | | |TensorConstant{1} [@D]
-    | |Elemwise{add,no_inplace} [@E] ''
-    |   |DimShuffle{x} [@F] ''
-    |   | |TensorConstant{1} [@D]
-    |   |Elemwise{exp,no_inplace} [@G] ''
-    |     |Elemwise{sub,no_inplace} [@H] ''
-    |       |Elemwise{neg,no_inplace} [@I] ''
-    |       | |dot [@J] ''
-    |       |   |x [@K]
-    |       |   |w [@L]
-    |       |DimShuffle{x} [@M] ''
-    |         |b [@N]
-    |DimShuffle{x} [@O] ''
-      |TensorConstant{0.5} [@P]
-
-The graph after optimization:
-
-.. doctest::
-   :options: +SKIP
-
-   >>> theano.printing.debugprint(predict) # doctest: +NORMALIZE_WHITESPACE
-   Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [@A] ''   4
-    |CGemv{inplace} [@B] ''   3
-    | |Alloc [@C] ''   2
-    | | |TensorConstant{0.0} [@D]
-    | | |Shape_i{0} [@E] ''   1
-    | |   |x [@F]
-    | |TensorConstant{1.0} [@G]
-    | |x [@F]
-    | |w [@H]
-    | |TensorConstant{0.0} [@D]
-    |InplaceDimShuffle{x} [@I] ''   0
-    | |b [@J]
-    |TensorConstant{(1,) of 0.5} [@K]
-
-
-Picture Printing of Graphs
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``pydotprint`` requires graphviz and either pydot or pydot-ng.
-
-The graph before optimization:
-
-.. doctest::
-   :options: +SKIP
-
-   >>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)
-   The output file is available at pics/logreg_pydotprint_prediction.png
-
-.. image:: ./pics/logreg_pydotprint_prediction.png
-   :width: 800 px
-
-The graph after optimization:
-
-.. doctest::
-   :options: +SKIP
-
-   >>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)
-   The output file is available at pics/logreg_pydotprint_predict.png
-
-.. image:: ./pics/logreg_pydotprint_predict.png
-   :width: 800 px
-
-The optimized training graph:
-
-.. doctest::
-   :options: +SKIP
-
-   >>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)
-   The output file is available at pics/logreg_pydotprint_train.png
-
-.. image:: ./pics/logreg_pydotprint_train.png
-   :width: 1500 px
-
-
-
-Debugging
----------
-
-- Run with the Theano flag ``compute_test_value = {``off'',``ignore'', ``warn'', ``raise''}``
-
-  - Run the code as we create the graph
-  - Allows you to find the bug earlier (ex: shape mismatch)
-  - Makes it easier to identify where the problem is in *your* code
-  - Use the value of constants and shared variables directly
-  - For pure symbolic variables uses ``x.tag.test_value = numpy.random.rand(5,10)``
-
-- Run with the flag ``mode=FAST_COMPILE``
-
-  - Few optimizations
-  - Run Python code (better error messages and can be debugged interactively in the Python debugger)
-
-- Run with the flag ``mode=DebugMode``
-
-  - 100-1000x slower
-  - Test all optimization steps from the original graph to the final graph
-  - Checks many things that Op should/shouldn't do
-  - Executes both the Python and C code versions
-
-Known limitations
------------------
-
-- Compilation phase distinct from execution phase
-
-  - Use ``a_tensor_variable.eval()`` to make this less visible
-
-- Compilation time can be significant
-
-  - Amortize it with functions over big input or reuse functions
-
-- Execution overhead
-
-  - We have worked on this, but more work needed
-  - So needs a certain number of operations to be useful
-
-- Compilation time superlinear in the size of the graph.
-
-  - Hundreds of nodes is fine
-  - Disabling a few optimizations can speed up compilation
-  - Usually too many nodes indicates a problem with the graph
diff --git a/doc/crei2013/gpundarray.txt b/doc/crei2013/gpundarray.txt
deleted file mode 100644
index f0462975a1..0000000000
--- a/doc/crei2013/gpundarray.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-
-.. _crei2013_gpundarray:
-
-**********
-GpuNdArray
-**********
-
-Why a common GPU ndarray?
--------------------------
-
-- Currently there are at least 4 different GPU array data structures in use by Python packages
-
-  - CudaNdarray (Theano), GPUArray (PyCUDA), CUDAMatrix (cudamat), GPUArray (PyOpenCL), ...
-  - There are even more if we include other languages
-
-- All of them are a subset of the functionality of ``numpy.ndarray`` on the GPU
-- Lots of duplicated effort
-
-  - GPU code is harder/slower to do {\bf correctly} and {\bf fast} than on the CPU/Python
-
-- Lack of a common array API makes it harder to port/reuse code
-- Also harder to find/distribute code
-- Divides development work
-
-
-Design Goals
-------------
-
-- Make it VERY similar to ``numpy.ndarray``
-- Be compatible with both CUDA and OpenCL
-- Have the base object accessible from C to allow collaboration with more projects, across high-level languages
-
-  - We want people from C, C++, Ruby, R, ... all use the same base GPU N-dimensional array
-
-
-Final Note
-----------
-
-- Under development
-- Will be the next GPU array container for Theano (*this summer!*)
-- Probably also for PyCUDA, PyOpenCL
-- Mailing list: http://lists.tiker.net/listinfo/gpundarray
diff --git a/doc/crei2013/ifelse_switch.py b/doc/crei2013/ifelse_switch.py
deleted file mode 100644
index 4e9bb232d4..0000000000
--- a/doc/crei2013/ifelse_switch.py
+++ /dev/null
@@ -1,35 +0,0 @@
-
-import time
-
-import numpy as np
-
-import theano
-from theano import tensor as tt
-
-from theano.ifelse import ifelse
-
-a, b = tt.scalars('a', 'b')
-x, y = tt.matrices('x', 'y')
-
-z_switch = tt.switch(tt.lt(a, b), tt.mean(x), tt.mean(y))
-z_lazy = ifelse(tt.lt(a, b), tt.mean(x), tt.mean(y))
-
-f_switch = theano.function([a, b, x, y], z_switch)
-f_lazyifelse = theano.function([a, b, x, y], z_lazy)
-
-val1 = 0.
-val2 = 1.
-big_mat1 = np.ones((10000, 1000))
-big_mat2 = np.ones((10000, 1000))
-
-n_times = 10
-
-tic = time.clock()
-for i in range(n_times):
-    f_switch(val1, val2, big_mat1, big_mat2)
-print(f'time spent evaluating both values {time.clock() - tic:f} sec')
-
-tic = time.clock()
-for i in range(n_times):
-    f_lazyifelse(val1, val2, big_mat1, big_mat2)
-print(f'time spent evaluating one value {time.clock() - tic:f} sec')
diff --git a/doc/crei2013/index.txt b/doc/crei2013/index.txt
deleted file mode 100644
index 53e06029cf..0000000000
--- a/doc/crei2013/index.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-:orphan:
-
-.. _crei2013_index:
-
-===========================
-Theano Tutorial @ CREI 2013
-===========================
-
-July 19, 2013, Sherbrook, Québec, Canada.
-
-
-Theano is python software for evaluating complicated array expressions.
-
-What does it do?
-
- * aggressive expression optimizations,
-
- * automatic GPU use,
-
- * symbolic differentiation and R op.
-
-It complements the Python numeric/scientific software stack (e.g. NumPy, SciPy,
-scikits, matplotlib, PIL.)
-
-Design and feature set has been driven by machine learning research
-at the University of
-Montreal (groups of Yoshua Bengio, Pascal Vincent, Aaron Courville and Roland Memisevic)
-The result is a very good library for doing research in deep
-learning and neural network training, and a flexible framework for
-many other models and algorithms in machine learning more generally.
-
-It has proven to be useful for implementing:
-
- - linear and nonlinear neural network classifiers
-
- - convolutional models
-
- - Energy models: RBM, DBN, GRBM, ssRBM, AIS
-
- - Auto-encoders: DAE, CAE
-
- - GP regression
-
- - sparse coding
-
- - recurrent neural networks, echo state, (HMM?)
-
- - online and batch learning and optimization
-
- - Even SVM!
-
-As people's needs change this list will grow, but Theano is built
-around vector, matrix, and tensor expressions; there is little reason
-to use it for calculations on other data structures except. There is
-also some sparse matrix support.
-
-
-Contents
---------
-
-The structured part of these lab sessions will be a walk-through of the following
-material. Interleaved with this structured part will be blocks of time for
-individual or group work.  The idea is that you can try out Theano and get help
-from gurus on hand if you get stuck.
-
-.. toctree::
-
-    introduction
-    theano
-    advanced_theano
-    gpundarray
-    /extending/extending_theano
diff --git a/doc/crei2013/introduction.txt b/doc/crei2013/introduction.txt
deleted file mode 100644
index 070706962a..0000000000
--- a/doc/crei2013/introduction.txt
+++ /dev/null
@@ -1,397 +0,0 @@
-
-.. _crei2013_Introduction:
-
-
-************
-Introduction
-************
-
-Background Questionaire
------------------------
-
-* Who has used Theano before?
-
- * What did you do with it?
-
-* Who has used Python? NumPy? SciPy? matplotlib?
-
-* Who has used iPython?
-
- * Who has used it as a distributed computing engine?
-
-* Who has done C/C++ programming?
-
-* Who has organized computation around a particular physical memory layout?
-
-* Who has used a multidimensional array of >2 dimensions?
-
-* Who has written a Python module in C before?
-
- * Who has written a program to *generate* Python modules in C?
-
-* Who has used a templating engine?
-
-* Who has programmed a GPU before?
-
- * Using OpenGL / shaders ?
-
- * Using CUDA (runtime? / driver?)
-
- * Using PyCUDA ?
-
- * Using OpenCL / PyOpenCL ?
-
- * Using cudamat / gnumpy ?
-
- * Other?
-
-* Who has used Cython?
-
-
-Python in one slide
--------------------
-
-* General-purpose high-level OO interpreted language
-
-* Emphasizes code readability
-
-* Comprehensive standard library
-
-* Dynamic type and memory management
-
-* Built-in types: int, float, str, list, dict, tuple, object
-
-* Slow execution
-
-* Popular in web-dev and scientific communities
-
-
-.. code-block:: python
-
-    #######################
-    # PYTHON SYNTAX EXAMPLE
-    #######################
-    a = 1                     # no type declaration required!
-    b = (1, 2, 3)             # tuple of three int literals
-    c = [1, 2, 3]             # list of three int literals
-    d = {'a': 5, b: None}     # dictionary of two elements
-                              # N.B. string literal, None
-
-    print d['a']              # square brackets index
-    # -> 5
-    print d[(1, 2, 3)]        # new tuple == b, retrieves None
-    # -> None
-    print d[6]
-    # raises KeyError Exception
-
-    x, y, z = 10, 100, 100    # multiple assignment from tuple
-    x, y, z = b               # unpacking a sequence
-
-    b_squared = [b_i**2 for b_i in b]  # list comprehension
-
-    def foo(b, c=3):          # function w default param c
-        return a + b + c      # note scoping, indentation
-
-    foo(5)                    # calling a function
-    # -> 1 + 5 + 3 == 9       # N.B. scoping
-    foo(b=6, c=2)             # calling with named args
-    # -> 1 + 6 + 2 == 9
-
-    print b[1:3]              # slicing syntax
-
-    class Foo(object):        # Defining a class
-        def __init__(self):
-            self.a = 5
-        def hello(self):
-            return self.a
-
-    f = Foo()                 # Creating a class instance
-    print f.hello()           # Calling methods of objects
-    # -> 5
-
-    class Bar(Foo):           # Defining a subclass
-        def __init__(self, a):
-            self.a = a
-
-    print Bar(99).hello()     # Creating an instance of Bar
-    # -> 99
-
-NumPy in one slide
-------------------
-
-* Python floats are full-fledged objects on the heap
-
- * Not suitable for high-performance computing!
-
-* NumPy provides a N-dimensional numeric array in Python
-
- * Perfect for high-performance computing.
- * Slice are return view (no copy)
-
-* NumPy provides
-
- * elementwise computations
-
- * linear algebra, Fourier transforms
-
- * pseudorandom numbers from many distributions
-
-* SciPy provides lots more, including
-
- * more linear algebra
-
- * solvers and optimization algorithms
-
- * matlab-compatible I/O
-
- * I/O and signal processing for images and audio
-
-.. code-block:: python
-
-    ##############################
-    # Properties of NumPy arrays
-    # that you really need to know
-    ##############################
-
-    import numpy as np          # import can rename
-    a = np.random.rand(3, 4, 5) # random generators
-    a32 = a.astype('float32')   # arrays are strongly typed
-
-    a.ndim                      # int: 3
-    a.shape                     # tuple: (3, 4, 5)
-    a.size                      # int: 60
-    a.dtype                     # np.dtype object: 'float64'
-    a32.dtype                   # np.dtype object: 'float32'
-
-    assert a[1, 1, 1] != 10     # a[1, 1, 1] is a view
-    a[1, 1, 1] = 10             # So affectation to it change the
-    assert a[1, 1, 1] == 10     # original array
-
-
-Arrays can be combined with numeric operators, standard mathematical
-functions. NumPy has great `documentation <http://docs.scipy.org/doc/numpy/reference/>`_.
-
-Training an MNIST-ready classification neural network in pure NumPy might look like this:
-
-.. code-block:: python
-
-    #########################
-    # NumPy for Training a
-    # Neural Network on MNIST
-    #########################
-
-    x = np.load('data_x.npy')
-    y = np.load('data_y.npy')
-    w = np.random.normal(
-        avg=0,
-        std=.1,
-        size=(784, 500))
-    b = np.zeros((500,))
-    v = np.zeros((500, 10))
-    c = np.zeros((10,))
-
-    batchsize = 100
-    for i in range(1000):
-        x_i = x[i * batchsize: (i + 1) * batchsize]
-        y_i = y[i * batchsize: (i + 1) * batchsize]
-
-        hidin = np.dot(x_i, w) + b
-
-        hidout = np.tanh(hidin)
-
-        outin = np.dot(hidout, v) + c
-        outout = (np.tanh(outin) + 1) / 2.0
-
-        g_outout = outout - y_i
-        err = 0.5 * np.sum(g_outout) ** 2
-
-        g_outin = g_outout * outout * (1.0 - outout)
-
-        g_hidout = np.dot(g_outin, v.T)
-        g_hidin = g_hidout * (1 - hidout ** 2)
-
-        b -= lr * np.sum(g_hidin, axis=0)
-        c -= lr * np.sum(g_outin, axis=0)
-        w -= lr * np.dot(x_i.T, g_hidin)
-        v -= lr * np.dot(hidout.T, g_outin)
-
-
-What's missing?
----------------
-
-* Non-lazy evaluation (required by Python) hurts performance
-
-* NumPy is bound to the CPU
-
-* NumPy lacks symbolic or automatic differentiation
-
-Now let's have a look at the same algorithm in Theano, which runs 15 times faster if
-you have GPU (I'm skipping some dtype-details which we'll come back to).
-
-.. code-block:: python
-
-    #########################
-    # Theano for Training a
-    # Neural Network on MNIST
-    #########################
-
-    import numpy as np
-
-    import theano
-    import theano.tensor as tt
-
-    x = np.load('data_x.npy')
-    y = np.load('data_y.npy')
-
-    # symbol declarations
-    sx = tt.matrix()
-    sy = tt.matrix()
-    w = theano.shared(np.random.normal(avg=0, std=.1,
-                                       size=(784, 500)))
-    b = theano.shared(np.zeros(500))
-    v = theano.shared(np.zeros((500, 10)))
-    c = theano.shared(np.zeros(10))
-
-    # symbolic expression-building
-    hid = tt.tanh(tt.dot(sx, w) + b)
-    out = tt.tanh(tt.dot(hid, v) + c)
-    err = 0.5 * tt.sum(out - sy) ** 2
-    gw, gb, gv, gc = theano.grad(err, [w, b, v, c])
-
-    # compile a fast training function
-    train = theano.function([sx, sy], err,
-        updates={
-            w: w - lr * gw,
-            b: b - lr * gb,
-            v: v - lr * gv,
-            c: c - lr * gc})
-
-    # now do the computations
-    batchsize = 100
-    for i in range(1000):
-        x_i = x[i * batchsize: (i + 1) * batchsize]
-        y_i = y[i * batchsize: (i + 1) * batchsize]
-        err_i = train(x_i, y_i)
-
-
-Theano in one slide
--------------------
-
-* High-level domain-specific language tailored to numeric computation
-
-* Compiles most common expressions to C for CPU and GPU.
-
-* Limited expressivity means lots of opportunities for expression-level optimizations
-
- * No function call -> global optimization
-
- * Strongly typed -> compiles to machine instructions
-
- * Array oriented -> parallelizable across cores
-
- * Support for looping and branching in expressions
-
-* Expression substitution optimizations automatically draw
-  on many backend technologies for best performance.
-
- * FFTW, MKL, ATLAS, SciPy, Cython, CUDA
-
- * Slower fallbacks always available
-
-* Automatic differentiation and R op
-
-* Sparse matrices
-
-
-Project status
---------------
-
-* Mature: theano has been developed and used since January 2008 (5.5 yrs old)
-
-* Driven over 87 research papers
-
-* Good user documentation
-
-* Active mailing list with participants from outside our lab
-
-* Core technology for a funded Silicon-Valley startup
-
-* Many contributors (some from outside our lab)
-
-* Used to teach IFT6266 for many years
-
-* Used for research at Google and Yahoo.
-
-* Downloads (January 2011 -  June 8 2011):
-
- * Pypi (16 July 2013): 60k total, 159 last day, 823 last week
-
- * Github (`bleeding edge` repository): unknown
-
-
-
-
-Why scripting for GPUs?
------------------------
-
-They *Complement each other*:
-
-* GPUs are everything that scripting/high level languages are not
-
- * Highly parallel
-
- * Very architecture-sensitive
-
- * Built for maximum FP/memory throughput
-
- * So hard to program that meta-programming is easier.
-
-* CPU: largely restricted to control
-
- * Optimized for sequential code and low latency (rather than high throughput)
-
- * Tasks (1000/sec)
-
- * Scripting fast enough
-
-Best of both: scripted CPU invokes JIT-compiled kernels on GPU.
-
-
-How Fast are GPUs?
-------------------
-
-* Theory
-
- * Intel Core i7 980 XE (107Gf/s float64) 6 cores
-
- * NVIDIA C2050 (515 Gf/s float64, 1Tf/s float32) 480 cores
-
- * NVIDIA GTX580 (1.5Tf/s float32) 512 cores
-
- * GPUs are faster, cheaper, more power-efficient
-
-* Practice (our experience)
-
- * Depends on algorithm and implementation!
-
- * Reported speed improvements over CPU in lit. vary *widely* (.01x to 1000x)
-
- * Matrix-matrix multiply speedup: usually about 10-20x.
-
- * Convolution speedup: usually about 15x.
-
- * Elemwise speedup: slower or up to 100x (depending on operation and layout)
-
- * Sum: can be faster or slower depending on layout.
-
-* Benchmarking is delicate work...
-
- * How to control quality of implementation?
-
-  * How much time was spent optimizing CPU vs GPU code?
-
- * Theano goes up to 100x faster on GPU because it uses only one CPU core
-
- * Theano can be linked with multi-core capable BLAS (GEMM and GEMV)
-
-* If you see speedup > 100x, the benchmark is probably not fair.
diff --git a/doc/crei2013/logreg.py b/doc/crei2013/logreg.py
deleted file mode 100644
index ae1fc1d91e..0000000000
--- a/doc/crei2013/logreg.py
+++ /dev/null
@@ -1,45 +0,0 @@
-
-import numpy as np
-import theano
-import theano.tensor as tt
-rng = np.random
-
-N = 400
-feats = 784
-D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
-training_steps = 10000
-
-# Declare Theano symbolic variables
-x = tt.matrix("x")
-y = tt.vector("y")
-w = theano.shared(rng.randn(feats), name="w")
-b = theano.shared(0., name="b")
-print("Initial model:")
-print(w.get_value(), b.get_value())
-
-# Construct Theano expression graph
-p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))   # Probability that target = 1
-prediction = p_1 > 0.5                      # The prediction thresholded
-xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy loss
-cost = xent.mean() + 0.01 * (w ** 2).sum()  # The cost to minimize
-gw, gb = tt.grad(cost, [w, b])
-
-# Compile
-train = theano.function(
-    inputs=[x, y],
-    outputs=[prediction, xent],
-    updates=[(w, w - 0.1 * gw),
-             (b, b - 0.1 * gb)],
-    name='train')
-
-predict = theano.function(inputs=[x], outputs=prediction,
-                          name='predict')
-
-# Train
-for i in range(training_steps):
-    pred, err = train(D[0], D[1])
-
-print("Final model:")
-print(w.get_value(), b.get_value())
-print("target values for D:", D[1])
-print("prediction on D:", predict(D[0]))
diff --git a/doc/crei2013/logreg_profile.prof b/doc/crei2013/logreg_profile.prof
deleted file mode 100644
index bd61054a32..0000000000
--- a/doc/crei2013/logreg_profile.prof
+++ /dev/null
@@ -1,121 +0,0 @@
-Function profiling
-==================
-  Message: train
-  Time in 10000 calls to Function.__call__: 7.171231e+00s
-  Time in Function.fn.__call__: 6.686692e+00s (93.243%)
-  Time in thunks: 6.511275e+00s (90.797%)
-  Total compile time: 6.550491e-01s
-    Theano Optimizer time: 5.976810e-01s
-       Theano validate time: 1.260662e-02s
-    Theano Linker time (includes C, CUDA code generation/compiling): 2.649593e-02s
-
-Class
----
-<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
-  87.0%    87.0%       5.665s       2.83e-04s     C     20000        2   <class 'theano.tensor.blas_c.CGemv'>
-  11.5%    98.4%       0.746s       7.46e-06s     C     100000       10   <class 'theano.tensor.elemwise.Elemwise'>
-   0.7%    99.1%       0.045s       2.27e-06s     C     20000        2   <class 'theano.tensor.basic.Alloc'>
-   0.5%    99.6%       0.030s       1.01e-06s     C     30000        3   <class 'theano.tensor.elemwise.DimShuffle'>
-   0.2%    99.8%       0.013s       1.34e-06s     C     10000        1   <class 'theano.tensor.elemwise.Sum'>
-   0.2%   100.0%       0.012s       6.00e-07s     C     20000        2   <class 'theano.tensor.opt.Shape_i'>
-   ... (remaining 0 Classes account for   0.00%(0.00s) of the runtime)
-
-Ops
----
-<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name>
-  87.0%    87.0%       5.665s       2.83e-04s     C     20000        2   CGemv{inplace}
-   6.9%    93.9%       0.452s       4.52e-05s     C     10000        1   Elemwise{Composite{[Composite{[Composite{[sub(mul(i0, i1), neg(i2))]}(
-   1.8%    95.7%       0.116s       1.16e-05s     C     10000        1   Elemwise{Composite{[Composite{[Composite{[Composite{[mul(i0, add(i1, i
-   1.7%    97.4%       0.109s       1.09e-05s     C     10000        1   Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}}}[(0, 
-   0.7%    98.1%       0.045s       2.27e-06s     C     20000        2   Alloc
-   0.3%    98.4%       0.020s       1.02e-06s     C     20000        2   InplaceDimShuffle{x}
-   0.2%    98.6%       0.015s       1.50e-06s     C     10000        1   Elemwise{sub,no_inplace}
-   0.2%    98.8%       0.014s       1.42e-06s     C     10000        1   Elemwise{gt,no_inplace}
-   0.2%    99.1%       0.013s       1.34e-06s     C     10000        1   Sum
-   0.2%    99.3%       0.013s       1.29e-06s     C     10000        1   Elemwise{neg,no_inplace}
-   0.2%    99.4%       0.012s       6.00e-07s     C     20000        2   Shape_i{0}
-   0.2%    99.6%       0.010s       9.84e-07s     C     10000        1   InplaceDimShuffle{1,0}
-   0.1%    99.7%       0.010s       9.58e-07s     C     10000        1   Elemwise{Composite{[sub(neg(i0), i1)]}}[(0, 0)]
-   0.1%    99.8%       0.007s       6.95e-07s     C     10000        1   Elemwise{Cast{float64}}
-   0.1%    99.9%       0.005s       5.46e-07s     C     10000        1   Elemwise{inv,no_inplace}
-   0.1%   100.0%       0.005s       4.88e-07s     C     10000        1   Elemwise{Composite{[sub(i0, mul(i1, i2))]}}[(0, 0)]
-   ... (remaining 0 Ops account for   0.00%(0.00s) of the runtime)
-
-Apply
-------
-<% time> <sum %> <apply time> <time per call> <#call> <id> <Apply name>
-  51.0%    51.0%       3.319s       3.32e-04s   10000     7 CGemv{inplace}(Alloc.0, TensorConstant{1.0}, x, w, TensorConstant{0.0})
-  36.0%    87.0%       2.345s       2.35e-04s   10000    18 CGemv{inplace}(w, TensorConstant{-0.1}, x.T, Elemwise{Composite{[Composite{[Compo
-   6.9%    93.9%       0.452s       4.52e-05s   10000    13 Elemwise{Composite{[Composite{[Composite{[sub(mul(i0, i1), neg(i2))]}(i0, scalar_
-   1.8%    95.7%       0.116s       1.16e-05s   10000    16 Elemwise{Composite{[Composite{[Composite{[Composite{[mul(i0, add(i1, i2))]}(i0, n
-   1.7%    97.4%       0.109s       1.09e-05s   10000    14 Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}}}[(0, 0)](Elemwis
-   0.5%    97.9%       0.031s       3.13e-06s   10000    12 Alloc(Elemwise{inv,no_inplace}.0, Shape_i{0}.0)
-   0.2%    98.1%       0.015s       1.50e-06s   10000     4 Elemwise{sub,no_inplace}(TensorConstant{(1,) of 1.0}, y)
-   0.2%    98.3%       0.014s       1.42e-06s   10000    15 Elemwise{gt,no_inplace}(Elemwise{ScalarSigmoid{output_types_preference=transfer_t
-   0.2%    98.5%       0.014s       1.40e-06s   10000     5 Alloc(TensorConstant{0.0}, Shape_i{0}.0)
-   0.2%    98.7%       0.013s       1.34e-06s   10000    17 Sum(Elemwise{Composite{[Composite{[Composite{[Composite{[mul(i0, add(i1, i2))]}(i
-   0.2%    98.9%       0.013s       1.33e-06s   10000     0 InplaceDimShuffle{x}(b)
-   0.2%    99.1%       0.013s       1.29e-06s   10000    11 Elemwise{neg,no_inplace}(Elemwise{Composite{[sub(neg(i0), i1)]}}[(0, 0)].0)
-   0.2%    99.3%       0.010s       9.84e-07s   10000     2 InplaceDimShuffle{1,0}(x)
-   0.1%    99.4%       0.010s       9.58e-07s   10000     9 Elemwise{Composite{[sub(neg(i0), i1)]}}[(0, 0)](CGemv{inplace}.0, InplaceDimShuff
-   0.1%    99.6%       0.007s       7.11e-07s   10000     6 InplaceDimShuffle{x}(Shape_i{0}.0)
-   0.1%    99.7%       0.007s       6.95e-07s   10000     8 Elemwise{Cast{float64}}(InplaceDimShuffle{x}.0)
-   0.1%    99.8%       0.006s       6.18e-07s   10000     1 Shape_i{0}(x)
-   0.1%    99.8%       0.006s       5.82e-07s   10000     3 Shape_i{0}(y)
-   0.1%    99.9%       0.005s       5.46e-07s   10000    10 Elemwise{inv,no_inplace}(Elemwise{Cast{float64}}.0)
-   0.1%   100.0%       0.005s       4.88e-07s   10000    19 Elemwise{Composite{[sub(i0, mul(i1, i2))]}}[(0, 0)](b, TensorConstant{0.1}, Sum.0
-   ... (remaining 0 Apply instances account for 0.00%(0.00s) of the runtime)
-
-Function profiling
-==================
-  Message: predict
-  Time in 1 calls to Function.__call__: 4.870892e-04s
-  Time in Function.fn.__call__: 4.608631e-04s (94.616%)
-  Time in thunks: 4.491806e-04s (92.217%)
-  Total compile time: 7.993293e-02s
-    Theano Optimizer time: 7.383800e-02s
-       Theano validate time: 2.010584e-03s
-    Theano Linker time (includes C, CUDA code generation/compiling): 4.319906e-03s
-
-Class
----
-<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
-  94.2%    94.2%       0.000s       4.23e-04s     C        1        1   <class 'theano.tensor.blas_c.CGemv'>
-   4.0%    98.2%       0.000s       1.81e-05s     C        1        1   <class 'theano.tensor.elemwise.Elemwise'>
-   0.7%    98.9%       0.000s       3.10e-06s     C        1        1   <class 'theano.tensor.basic.Alloc'>
-   0.6%    99.5%       0.000s       2.86e-06s     C        1        1   <class 'theano.tensor.elemwise.DimShuffle'>
-   0.5%   100.0%       0.000s       2.15e-06s     C        1        1   <class 'theano.tensor.opt.Shape_i'>
-   ... (remaining 0 Classes account for   0.00%(0.00s) of the runtime)
-
-Ops
----
-<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name>
-  94.2%    94.2%       0.000s       4.23e-04s     C        1        1   CGemv{inplace}
-   4.0%    98.2%       0.000s       1.81e-05s     C        1        1   Elemwise{Composite{[Composite{[Composite{[Composite{[GT(scalar_sigmoid
-   0.7%    98.9%       0.000s       3.10e-06s     C        1        1   Alloc
-   0.6%    99.5%       0.000s       2.86e-06s     C        1        1   InplaceDimShuffle{x}
-   0.5%   100.0%       0.000s       2.15e-06s     C        1        1   Shape_i{0}
-   ... (remaining 0 Ops account for   0.00%(0.00s) of the runtime)
-
-Apply
-------
-<% time> <sum %> <apply time> <time per call> <#call> <id> <Apply name>
-  94.2%    94.2%       0.000s       4.23e-04s      1     3 CGemv{inplace}(Alloc.0, TensorConstant{1.0}, x, w, TensorConstant{0.0})
-   4.0%    98.2%       0.000s       1.81e-05s      1     4 Elemwise{Composite{[Composite{[Composite{[Composite{[GT(scalar_sigmoid(i0), i1)]}
-   0.7%    98.9%       0.000s       3.10e-06s      1     2 Alloc(TensorConstant{0.0}, Shape_i{0}.0)
-   0.6%    99.5%       0.000s       2.86e-06s      1     0 InplaceDimShuffle{x}(b)
-   0.5%   100.0%       0.000s       2.15e-06s      1     1 Shape_i{0}(x)
-   ... (remaining 0 Apply instances account for 0.00%(0.00s) of the runtime)
-
-Function profiling
-==================
-  Message: Sum of all printed profiles at exit
-  Time in 10001 calls to Function.__call__: 7.171718e+00s
-  Time in Function.fn.__call__: 6.687153e+00s (93.243%)
-  Time in thunks: 6.511724e+00s (90.797%)
-  Total compile time: 7.349820e-01s
-    Theano Optimizer time: 6.715190e-01s
-       Theano validate time: 1.461720e-02s
-    Theano Linker time (includes C, CUDA code generation/compiling): 3.081584e-02s
-
-  [...]
diff --git a/doc/crei2013/pics/logreg_pydotprint_predict.png b/doc/crei2013/pics/logreg_pydotprint_predict.png
deleted file mode 100644
index d4d9047a87..0000000000
Binary files a/doc/crei2013/pics/logreg_pydotprint_predict.png and /dev/null differ
diff --git a/doc/crei2013/pics/logreg_pydotprint_prediction.png b/doc/crei2013/pics/logreg_pydotprint_prediction.png
deleted file mode 100644
index acaa154cbc..0000000000
Binary files a/doc/crei2013/pics/logreg_pydotprint_prediction.png and /dev/null differ
diff --git a/doc/crei2013/pics/logreg_pydotprint_train.png b/doc/crei2013/pics/logreg_pydotprint_train.png
deleted file mode 100644
index 627df9893c..0000000000
Binary files a/doc/crei2013/pics/logreg_pydotprint_train.png and /dev/null differ
diff --git a/doc/crei2013/scan_poly.py b/doc/crei2013/scan_poly.py
deleted file mode 100644
index a2cd3cc32c..0000000000
--- a/doc/crei2013/scan_poly.py
+++ /dev/null
@@ -1,24 +0,0 @@
-
-import numpy as np
-
-import theano
-import theano.tensor as tt
-
-coefficients = theano.tensor.vector("coefficients")
-x = tt.scalar("x")
-max_coefficients_supported = 10000
-
-# Generate the components of the polynomial
-full_range = theano.tensor.arange(max_coefficients_supported)
-components, updates = theano.scan(fn=lambda coeff, power, free_var:
-                                  coeff * (free_var ** power),
-                                  outputs_info=None,
-                                  sequences=[coefficients, full_range],
-                                  non_sequences=x)
-polynomial = components.sum()
-calculate_polynomial = theano.function(inputs=[coefficients, x],
-                                       outputs=polynomial)
-
-test_coeff = np.asarray([1, 0, 2], dtype=np.float32)
-print(calculate_polynomial(test_coeff, 3))
-# 19.0
diff --git a/doc/crei2013/scan_pow.py b/doc/crei2013/scan_pow.py
deleted file mode 100644
index 0fe1cbe613..0000000000
--- a/doc/crei2013/scan_pow.py
+++ /dev/null
@@ -1,26 +0,0 @@
-
-import theano
-import theano.tensor as tt
-
-
-k = tt.iscalar("k")
-A = tt.vector("A")
-
-
-def inner_fct(prior_result, A):
-    return prior_result * A
-# Symbolic description of the result
-result, updates = theano.scan(fn=inner_fct,
-                              outputs_info=tt.ones_like(A),
-                              non_sequences=A, n_steps=k)
-
-# Scan has provided us with A**1 through A**k.  Keep only the last
-# value. Scan notices this and does not waste memory saving them.
-final_result = result[-1]
-
-power = theano.function(inputs=[A, k],
-                        outputs=final_result,
-                        updates=updates)
-
-print(power(list(range(10)), 2))
-#[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]
diff --git a/doc/crei2013/theano.txt b/doc/crei2013/theano.txt
deleted file mode 100644
index 3ee82e3478..0000000000
--- a/doc/crei2013/theano.txt
+++ /dev/null
@@ -1,334 +0,0 @@
-
-.. _crei2013_theano:
-
-******
-Theano
-******
-
-Pointers
---------
-
-* http://deeplearning.net/software/theano/
-* Announcements mailing list: http://groups.google.com/group/theano-announce
-* User mailing list: http://groups.google.com/group/theano-users
-* Deep Learning Tutorials: http://www.deeplearning.net/tutorial/
-* Installation: https://deeplearning.net/software/theano/install.html
-
-
-Description
------------
-
-* Mathematical symbolic expression compiler
-* Dynamic C/CUDA code generation
-* Efficient symbolic differentiation
-
-  * Theano computes derivatives of functions with one or many inputs.
-
-* Speed and stability optimizations
-
-  * Gives the right answer for ``log(1+x)`` even if x is really tiny.
-
-* Works on Linux, Mac and Windows
-* Transparent use of a GPU
-
-  * float32 only for now (working on other data types)
-  * Still in experimental state on Windows
-  * On GPU data-intensive calculations are typically between 6.5x and 44x faster. We've seen speedups up to 140x
-
-* Extensive unit-testing and self-verification
-
-  * Detects and diagnoses many types of errors
-
-* On CPU, common machine learning algorithms are 1.6x to 7.5x faster than competitive alternatives
-
-  * including specialized implementations in C/C++, NumPy, SciPy, and Matlab
-
-* Expressions mimic NumPy's syntax & semantics
-* Statically typed and purely functional
-* Some sparse operations (CPU only)
-
-Simple example
---------------
-
->>> import theano
->>> a = theano.tensor.vector("a")      # declare symbolic variable
->>> b = a + a ** 10                    # build symbolic expression
->>> f = theano.function([a], b)        # compile function
->>> f([0, 1, 2])
-array([    0.,     2.,  1026.])
-
-======================================================  =====================================================
-        Unoptimized graph                                    Optimized graph
-======================================================  =====================================================
-.. image:: ../hpcs2011_tutorial/pics/f_unoptimized.png   .. image:: ../hpcs2011_tutorial/pics/f_optimized.png
-======================================================  =====================================================
-
-Symbolic programming = *Paradigm shift*: people need to use it to understand it.
-
-Exercise 1
------------
-
-.. code-block:: python
-
-  import theano
-  a = theano.tensor.vector()      # declare variable
-  out = a + a ** 10               # build symbolic expression
-  f = theano.function([a], out)   # compile function
-  print f([0, 1, 2])
-  # prints `array([0, 2, 1026])`
-
-  theano.printing.pydotprint_variables(b, outfile="f_unoptimized.png", var_with_name_simple=True)
-  theano.printing.pydotprint(f, outfile="f_optimized.png", var_with_name_simple=True)
-
-Modify and execute the example to do this expression: ``a ** 2 + b ** 2 + 2 * a * b``
-
-Real example
-------------
-
-**Logistic Regression**
-
-* GPU-ready
-* Symbolic differentiation
-* Speed optimizations
-* Stability optimizations
-
-.. literalinclude:: logreg.py
-
-
-**Optimizations:**
-
-Where are those optimization applied?
-
-* ``log(1+exp(x))``
-
-* ``1 / (1 + tt.exp(var))`` (sigmoid)
-
-* ``log(1-sigmoid(var))`` (softplus, stabilisation)
-
-* GEMV (matrix-vector multiply from BLAS)
-
-* Loop fusion
-
-
-.. code-block:: python
-
-  p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))
-  # 1 / (1 + tt.exp(var)) -> sigmoid(var)
-  xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)
-  # Log(1-sigmoid(var)) -> -sigmoid(var)
-  prediction = p_1 > 0.5
-  cost = xent.mean() + 0.01 * (w ** 2).sum()
-  gw, gb = tt.grad(cost, [w, b])
-
-  train = theano.function(
-            inputs=[x, y],
-            outputs=[prediction, xent],
-            # w - 0.1 * gw: GEMV with the dot in the grad
-            updates=[(w, w - 0.1 * gw),
-                     (b, b - 0.1 * gb)])
-
-
-Theano flags
-------------
-
-Theano can be configured with flags. They can be defined in two ways
-
-* With an environment variable: ``THEANO_FLAGS="profile=True,profile_memory=True"``
-
-* With a configuration file that defaults to ``~/.theanorc``
-
-
-Exercise 2
------------
-
-.. code-block:: python
-
-    import numpy
-    import theano
-    import theano.tensor as tt
-    rng = numpy.random
-
-    N = 400
-    feats = 784
-    D = (rng.randn(N, feats).astype(theano.config.floatX),
-    rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
-    training_steps = 10000
-
-    # Declare Theano symbolic variables
-    x = tt.matrix("x")
-    y = tt.vector("y")
-    w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
-    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
-    x.tag.test_value = D[0]
-    y.tag.test_value = D[1]
-    #print "Initial model:"
-    #print w.get_value(), b.get_value()
-
-
-    # Construct Theano expression graph
-    p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))  # Probability of having a one
-    prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
-    xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy
-    cost = xent.mean() + 0.01 * (w**2).sum()  # The cost to optimize
-    gw,gb = tt.grad(cost, [w, b])
-
-    # Compile expressions to functions
-    train = theano.function(
-                inputs=[x, y],
-                outputs=[prediction, xent],
-                updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
-                name="train")
-    predict = theano.function(inputs=[x], outputs=prediction,
-                              name="predict")
-
-    if any([x.op.__class__.__name__=='Gemv' for x in
-            train.maker.fgraph.toposort()]):
-        print 'Used the cpu'
-    elif any([x.op.__class__.__name__=='GpuGemm' for x in
-              train.maker.fgraph.toposort()]):
-        print 'Used the gpu'
-    else:
-        print 'ERROR, not able to tell if theano used the cpu or the gpu'
-        print train.maker.fgraph.toposort()
-
-
-
-    for i in range(training_steps):
-        pred, err = train(D[0], D[1])
-    #print "Final model:"
-    #print w.get_value(), b.get_value()
-
-    print "target values for D"
-    print D[1]
-
-    print "prediction on D"
-    print predict(D[0])
-
-    # Print the graph used in the slides
-    theano.printing.pydotprint(predict,
-                               outfile="pics/logreg_pydotprint_predic.png",
-                               var_with_name_simple=True)
-    theano.printing.pydotprint_variables(prediction,
-                               outfile="pics/logreg_pydotprint_prediction.png",
-                               var_with_name_simple=True)
-    theano.printing.pydotprint(train,
-                               outfile="pics/logreg_pydotprint_train.png",
-                               var_with_name_simple=True)
-
-Modify and execute the example to run on CPU with floatX=float32
-
-* You will need to use: ``theano.config.floatX`` and ``ndarray.astype("str")``
-
-GPU
----
-
-* Only 32 bit floats are supported (being worked on)
-* Only 1 GPU per process. Wiki page on using multiple process for multiple GPU
-* Use the Theano flag ``device=gpu`` to tell to use the GPU device
-
- * Use ``device=gpu{0, 1, ...}`` to specify which GPU if you have more than one
- * Shared variables with float32 dtype are by default moved to the GPU memory space
-
-* Use the Theano flag ``floatX=float32``
-
- * Be sure to use ``floatX`` (``theano.config.floatX``) in your code
- * Cast inputs before putting them into a shared variable
- * Cast "problem": int32 with float32 to float64
-
-  * Insert manual cast in your code or use [u]int{8,16}
-  * The mean operator is worked on to make the output stay in float32.
-
-* Use the Theano flag ``force_device=True``, to exit if Theano isn't able to use a GPU.
-
-  * Theano 0.6rc4 will have the combination of ``force_device=True``
-    and ``device=cpu`` disable the GPU.
-
-
-
-Exercise 3
------------
-
-* Modify and execute the example of `Exercise 2`_ to run with floatX=float32 on GPU
-
-* Time with: ``time python file.py``
-
-Symbolic variables
-------------------
-
-* # Dimensions
-
- * tt.scalar, tt.vector, tt.matrix, tt.tensor3, tt.tensor4
-
-* Dtype
-
- * tt.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
-
- * tt.vector to floatX dtype
-
- * floatX: configurable dtype that can be float32 or float64.
-
-* Custom variable
-
- * All are shortcuts to: ``tt.tensor(dtype, broadcastable=[False]*nd)``
-
- * Other dtype: uint[8,16,32,64], floatX
-
-Creating symbolic variables: Broadcastability
-
-* Remember what I said about broadcasting?
-
-* How to add a row to all rows of a matrix?
-
-* How to add a column to all columns of a matrix?
-
-
-Details regarding symbolic broadcasting...
-
-* Broadcastability must be specified when creating the variable
-
-* The only shorcut with broadcastable dimensions are: **tt.row** and **tt.col**
-
-* For all others: ``tt.tensor(dtype, broadcastable=([False or True])*nd)``
-
-
-Differentiation details
------------------------
-
->>> gw, gb = tt.grad(cost, [w,b])  # doctest: +SKIP
-
-* tt.grad works symbolically: takes and returns a Theano variable
-
-* tt.grad can be compared to a macro: it can be applied multiple times
-
-* tt.grad takes scalar costs only
-
-* Simple recipe allows to compute efficiently vector x Jacobian and vector x Hessian
-
-* We are working on the missing optimizations to be able to compute efficently the full Jacobian and Hessian and Jacobian x vector
-
-
-Old Benchmarks
---------------
-
-:ref:`Example: <cifar2011_benchmark>`
-
-* Multi-layer perceptron
-* Convolutional Neural Networks
-* Misc Elemwise operations
-
-Competitors: NumPy + SciPy, MATLAB, EBLearn, Torch5, numexpr
-
-* EBLearn, Torch5: specialized libraries written by practitioners specifically for these tasks
-* numexpr: similar to Theano, 'virtual machine' for elemwise expressions
-
-New Benchmarks
---------------
-
-`Example <http://arxiv.org/pdf/1211.5590v1.pdf>`_ (Page 7 and 9):
-
-* Logistic regression, MLP with 1 and 3 layers
-* Recurrent neural networks
-
-Competitors: Torch7, RNNLM
-
-* Torch7, RNNLM: specialized libraries written by practitioners specifically for these tasks
diff --git a/doc/dev_start_guide.txt b/doc/dev_start_guide.txt
index f07f565144..1197347873 100644
--- a/doc/dev_start_guide.txt
+++ b/doc/dev_start_guide.txt
@@ -7,29 +7,22 @@ Developer Start Guide
 Contributing
 ============
 
-You want to contribute to Theano? That is great! This page explain our
+You want to contribute to Aesara? That is great! This page explain our
 workflow and some resource for doing so.
 
 Looking for an idea for a first contribution? Check the `github issues
-<https://github.com/Theano/Theano/issues?q=is%3Aopen+is%3Aissue+label%3A%22Easy+fix%22>`_
-with a label ``easy fix``. They are good starter. It is recommended
-that you write on the issue you want to work on it. This help make
-sure it is up to date and see if nobody else is working on it. Also,
-we can sometimes provides more information about it.  There is also
-the label `NeedSomeoneToFinish
-<https://github.com/Theano/Theano/labels/NeedSomeoneToFinish>`_ that is
-interesting to check. The difficulty level is variable.
+<https://github.com/pymc-devs/aesara/issues>`_.
+
+We recommend creating an issue to discuss proposed changes before making them.
+This is a good way to make sure that proposed changes will be accepted.
 
 Resources
 =========
 
-See :ref:`theano_community` for a list of Theano resources. The
-following groups/mailing-lists are especially useful to Theano
-contributors: `theano-dev`_, `theano-buildbot`_, and `theano-github`_.
+See :ref:`aesara_community` for a list of Aesara resources. The Theano Google group
+is also relevant to (early) Aesara versions: `theano-dev`_.
 
 .. _theano-dev: https://groups.google.com/group/theano-dev
-.. _theano-github: https://groups.google.com/group/theano-github
-.. _theano-buildbot: https://groups.google.com/group/theano-buildbot
 
 
 To get up to speed, you'll need to
@@ -42,7 +35,6 @@ To get up to speed, you'll need to
 
 .. _Sphinx: http://sphinx.pocoo.org/
 .. _reStructuredText: http://docutils.sourceforge.net/rst.html
-.. _Allowed docstring sections in Napoleon: https://sphinxcontrib-napoleon.readthedocs.org/en/latest/#docstring-sections
 .. _NumPy documentation: http://docs.scipy.org/numpy/
 .. _unittest: http://docs.python.org/library/unittest.html
 .. _pytest: http://docs.pytest.org/en/latest/
@@ -53,10 +45,9 @@ To get up to speed, you'll need to
 Requirements for Quality Contributions
 ======================================
 
-* All the code should be properly tested.
+* All code should be accompanied by quality unit tests.
 
-* The code should be compatible with Python 2.7 and above, as well as
-  Python 3.4 and above (using `six` if needed).
+* The code should be compatible with Python 3.6 and above.
 
 * All the code should respect the
   `PEP8 Code Style Guide <http://www.python.org/dev/peps/pep-0008>`_.
@@ -72,39 +63,18 @@ Unit tests
 ----------
 
 When you submit a pull request, your changes will automatically be
-tested via Travis-CI. This will post the results of the tests with a
-little icon next to your commit. A yellow circle means the tests are
-running.  A red X means the tests failed and a green circle means the
-tests passed.
+tested via our continuous integration (CI).
 
 Just because the tests run automatically does not mean you shouldn't
 run them yourself to make sure everything is all right.  You can run
-only the portion you are modifying to go faster and have Travis to
-make sure there are no global impacts.
-
-Also, if you are changing GPU code, Travis doesn't test that, because
-there are no GPUs on the test nodes.
+only the portion you are modifying to go faster and have CI
+make sure there are no broader problems.
 
 To run the test suite with the default options, see
-:ref:`test_theano`.
-
-Each night we execute all the unit tests automatically, with several
-sets of options. The result is sent by email to the `theano-buildbot`_
-mailing list.
+:ref:`test_aesara`.
 
 For more detail, see :ref:`metadocumentation_nightly_build`.
 
-To run all the tests with the same configuration as the buildbot, run
-this script:
-
-.. code-block:: bash
-
-   theano/misc/do_nightly_build
-
-This script accepts arguments that it forwards to ``pytest``. You can
-run only some tests or enable pdb by giving the equivalent ``pytest``
-parameters.
-
 Setting up your Editor for PEP8
 -------------------------------
 
@@ -129,7 +99,7 @@ To setup VIM:
     pip install flake8
 
    .. warning:: Starting version 3.0.0, flake8 changed its dependencies and
-      moved its Python API to a legacy module, breaking Theano's flake8 tests.
+      moved its Python API to a legacy module, breaking Aesara's flake8 tests.
       We recommend using a version prior to 3.
 
    .. note:: You can use ``easy_install`` instead of ``pip``, and ``pep8``
@@ -292,8 +262,8 @@ Here is an example on how to add a docstring to a class.
 
 .. testcode:: python
 
-    import theano
-    from theano.graph.op import Op
+    import aesara
+    from aesara.graph.op import Op
 
 
     class DoubleOp(Op):
@@ -317,8 +287,8 @@ Here is an example on how to add a docstring to a class.
 
         See Also
         --------
-        :class:`~theano.tensor.elemwise.Elemwise` : You can use this to replace
-        this example.  Just execute `x * 2` with x being a Theano variable.
+        :class:`~aesara.tensor.elemwise.Elemwise` : You can use this to replace
+        this example.  Just execute `x * 2` with x being an Aesara variable.
 
 
         .. versionadded:: 0.6
@@ -327,19 +297,19 @@ Here is an example on how to add a docstring to a class.
 This is how it will show up for files that we auto-list in the library
 documentation:
 
-.. automodule:: theano.misc.doubleop
+.. automodule:: aesara.misc.doubleop
     :members:
 
 Installation and configuration
 ==============================
 
 To obtain developer access: register with `GitHub
-<http://www.github.com/>`_ and create a fork of `Theano
-<http://www.github.com/Theano/Theano>`_.
+<http://www.github.com/>`_ and create a fork of `Aesara
+<http://www.github.com/pymc-devs/aesara>`_.
 
-This will create your own Theano project on GitHub, referred later
-as "YourProfile/Theano", or "origin", from which you will be able to
-contribute to the original Theano/Theano, also called "central".
+This will create your own Aesara project on GitHub, referred later
+as "YourProfile/Aesara", or "origin", from which you will be able to
+contribute to the original Aesara/Aesara, also called "central".
 
 
 Create a local copy
@@ -349,25 +319,25 @@ Clone your fork locally with
 
 .. code-block:: bash
 
-    git clone git@github.com:YOUR_GITHUB_LOGIN/Theano.git
+    git clone git@github.com:YOUR_GITHUB_LOGIN/Aesara.git
 
 For this URL to work, you must set your public ssh keys inside your
 `github account setting <https://github.com/settings/ssh>`_.
 
 From your local repository, your own fork on GitHub will be called "origin".
 
-Then, add a reference to the original ("central") Theano repository with
+Then, add a reference to the original ("central") Aesara repository with
 
 .. code-block:: bash
 
-    git remote add central git://github.com/Theano/Theano.git
+    git remote add central git://github.com/pymc-devs/aesara.git
 
-You can choose another name than "central" to reference Theano/Theano
+You can choose another name than "central" to reference Aesara/Aesara
 (for instance, NumPy uses "upstream"), but this documentation will stick
 to "central."
 
-You can then test your installation of Theano by following the steps of
-:ref:`test_theano`.
+You can then test your installation of Aesara by following the steps of
+:ref:`test_aesara`.
 
 
 Using your local copy
@@ -389,7 +359,7 @@ Once you have such a branch, in order to update it, do:
     git pull
 
 Keep in mind that this branch should be "read-only": if you want to
-patch Theano, you should work in another branch, like described in the
+patch Aesara, you should work in another branch, like described in the
 :ref:`dev_workflow` section below.
 
 
@@ -426,7 +396,7 @@ Start a new local branch
 
 When working on a new feature in your own fork, start from an up-to-date copy
 of the `master` branch (the principal one) of the central repository
-(Theano/Theano on GitHub):
+(Aesara/Aesara on GitHub):
 
 .. code-block:: bash
 
@@ -461,7 +431,6 @@ Then, go to your fork's github page on the github website, select your
 feature branch and hit the "Pull Request" button in the top right
 corner.  This will signal the maintainers that you wish to submit your
 changes for inclusion in central/master.
-If you don't get any feedback, bug us on the theano-dev mailing list.
 
 
 Address reviewer comments
@@ -520,7 +489,7 @@ with:
 
    git push origin :my_shiny_feature
 
-This lines pushes to the "origin" repository (your fork of Theano on
+This lines pushes to the "origin" repository (your fork of Aesara on
 GitHub), into the branch "my_shiny_feature", an empty content (that's
 why there is nothing before the colon), effectively removing it.
 
@@ -571,9 +540,9 @@ Add another distant repository
 
 To collaborate with another user on some feature he is developing, and
 that is not ready for inclusion in central, the easiest way is to use a
-branch of their Theano fork (usually on GitHub).
+branch of their Aesara fork (usually on GitHub).
 
-Just like we added Theano/Theano as a remote repository, named
+Just like we added Aesara/Aesara as a remote repository, named
 "central", you can add (on your local machine) a reference to their fork
 as a new remote repository. REPO_NAME is the name you choose to name
 this fork, and GIT_REPO_PATH is the URL of the fork in question.
diff --git a/doc/environment.yml b/doc/environment.yml
index 291159bc1c..8bbc5c30d0 100644
--- a/doc/environment.yml
+++ b/doc/environment.yml
@@ -1,4 +1,4 @@
-name: theano-docs
+name: aesara-docs
 channels:
   - defaults
   - conda-forge
diff --git a/doc/extending/theano_vs_c.txt b/doc/extending/aesara_vs_c.txt
similarity index 85%
rename from doc/extending/theano_vs_c.txt
rename to doc/extending/aesara_vs_c.txt
index d402fc1c1e..3d03aa733b 100644
--- a/doc/extending/theano_vs_c.txt
+++ b/doc/extending/aesara_vs_c.txt
@@ -1,15 +1,15 @@
 
-.. _theano_vs_c:
+.. _aesara_vs_c:
 
 ============
-Theano vs. C
+Aesara vs. C
 ============
 
-We describe some of the patterns in Theano, and present their closest
+We describe some of the patterns in Aesara, and present their closest
 analogue in a statically typed language such as C:
 
 =============== ===========================================================
-Theano          C
+Aesara          C
 =============== ===========================================================
 Apply           function application / function call
 Variable        local function data / variable
@@ -36,11 +36,5 @@ Based on this code snippet, we can relate ``f`` and ``g`` to Ops, ``a``,
 ``b`` and ``c`` to Variables, ``d`` to Shared Variable, ``g(a, c)``,
 ``f(b)`` and ``d = b + c`` (taken as meaning
 the action of computing ``f``, ``g`` or ``+`` on their respective inputs) to
-Applies. Lastly, ``int`` could be interpreted as the Theano Type of the
+Applies. Lastly, ``int`` could be interpreted as the Aesara Type of the
 Variables ``a``, ``b``, ``c`` and ``d``.
-
-
-
-
-
-
diff --git a/doc/extending/cop.txt b/doc/extending/cop.txt
index a7e4520dd9..34249b4ee8 100644
--- a/doc/extending/cop.txt
+++ b/doc/extending/cop.txt
@@ -193,7 +193,7 @@ There are less methods to define for a `COp` than for a `Type`:
 
        For example, for the matrix-matrix product ``infer_shape`` will
        have as inputs ``(fgraph, node, ((x0,x1), (y0,y1)))`` and should return
-       ``[(x0, y1)]``. Both the inputs and the return value may be Theano
+       ``[(x0, y1)]``. Both the inputs and the return value may be Aesara
        variables.
 
     .. method:: c_code_cache_version()
@@ -220,7 +220,7 @@ There are less methods to define for a `COp` than for a `Type`:
 
        In addition, this allow to lower the number of compiled module
        and disk access. Particularly useful when the file system load
-       is high or when theano compilation directory is shared by many
+       is high or when aesara compilation directory is shared by many
        process (like on a network file server on a cluster).
 
     .. method:: get_params(node)
@@ -263,7 +263,7 @@ many strings as there are inputs and outputs to the application of the
 Op and they correspond to the ``name`` that is passed to the type of
 each Variable in these lists. For example, if ``node.inputs[0].type ==
 double``, then ``input_names[0]`` is the ``name`` argument passed to
-``double.c_declare`` etc. when the first input is processed by Theano.
+``double.c_declare`` etc. when the first input is processed by Aesara.
 
 In a nutshell, ``input_names`` and ``output_names`` parameterize the
 names of the inputs your operation needs to use and the outputs it
@@ -291,7 +291,7 @@ We will be defining C code for the multiplication `COp` on doubles.
 
 .. testsetup::
 
-   from theano.graph.op import COp
+   from aesara.graph.op import COp
    mul = COp()
 
 .. testcode::
@@ -318,7 +318,7 @@ each output to what they should be.
 .. warning::
    Do *NOT* use C's ``return`` statement to return the variable(s) of
    the computations. Set the output variables directly as shown
-   above. Theano will pick them up for you.
+   above. Aesara will pick them up for you.
 
 
 **c_code_cleanup**
@@ -338,8 +338,8 @@ version that it produces in the code I gave above.
 
 .. testcode::
 
-   from theano.graph.basic import Apply, Constant
-   from theano.graph.op import COp
+   from aesara.graph.basic import Apply, Constant
+   from aesara.graph.op import COp
 
 
    class BinaryDoubleOp(COp):
diff --git a/doc/extending/ctype.txt b/doc/extending/ctype.txt
index d1a816730b..dfd323ccd5 100644
--- a/doc/extending/ctype.txt
+++ b/doc/extending/ctype.txt
@@ -1,4 +1,4 @@
-.. _theano_ctype:
+.. _aesara_ctype:
 
 
 ========================
@@ -133,7 +133,7 @@ prefix. The complete list can be found in the documentation for
     .. method:: c_element_type()
 
        Optional: should return the name of the primitive C type of
-       items into variables handled by this Theano type. For example,
+       items into variables handled by this Aesara type. For example,
        for a matrix of 32-bit signed NumPy integers, it should return
        ``"npy_int32"``. If C type may change from an instance to another
        (e.g. ``Scalar('int32')`` vs ``Scalar('int64')``), consider
@@ -169,7 +169,7 @@ out:
    nor ``break`` outside of your own loops or ``goto`` to strange
    places or anything like that. Failure to comply with this
    restriction could lead to erratic behavior, segfaults and/or memory
-   leaks because Theano defines its own cleanup system and assumes
+   leaks because Aesara defines its own cleanup system and assumes
    that you are not meddling with it. Furthermore, advanced operations
    or types might do code transformations on your code such as
    inserting it in a loop -- in that case they can call your
@@ -184,7 +184,7 @@ Defining the methods
 
 .. testcode::
 
-    from theano.graph.type import Generic
+    from aesara.graph.type import Generic
 
 
     class double(Generic):
@@ -206,7 +206,7 @@ do typedefs. Make sure that the name of each variable contains the
 ``name`` argument in order to avoid name collisions (collisions *will*
 happen if you don't parameterize the variable names as indicated
 here). Also note that you cannot declare a variable called
-``py_<name>`` or ``storage_<name>`` because Theano already defines
+``py_<name>`` or ``storage_<name>`` because Aesara already defines
 them.
 
 What you declare there is basically the C interface you are giving to
@@ -253,9 +253,9 @@ called, without knowing for sure which of the two.
             """ % dict(name = name, fail = sub['fail'])
 
 This method is slightly more sophisticated. What happens here is that
-we have a reference to a Python object which Theano has placed in
+we have a reference to a Python object which Aesara has placed in
 ``py_%(name)s`` where ``%(name)s`` must be substituted for the name
-given in the inputs. This special variable is declared by Theano as
+given in the inputs. This special variable is declared by Aesara as
 ``PyObject* py_%(name)s`` where ``PyObject*`` is a pointer to a Python
 object as defined by CPython's C API. This is the reference that
 corresponds, on the Python side of things, to a Variable with the
@@ -295,7 +295,7 @@ have computed some operation on doubles and we have put the variable
 into the double variable ``%(name)s``. Now, we need to put this data
 into a Python object that we can manipulate on the Python side of
 things. This Python object must be put into the ``py_%(name)s``
-variable which Theano recognizes (this is the same pointer we get in
+variable which Aesara recognizes (this is the same pointer we get in
 c_extract).
 
 Now, that pointer is already a pointer to a valid Python object
@@ -311,12 +311,12 @@ if the data you work on is large.
 Now that we have decreased the reference count, we call
 ``PyFloat_FromDouble`` on our double variable in order to convert it
 to a Python ``float``. This returns a new reference which we assign to
-``py_%(name)s``. From there Theano will do the rest and the end user
+``py_%(name)s``. From there Aesara will do the rest and the end user
 will happily see a Python ``float`` come out of his computations.
 
 The rest of the code is not absolutely necessary and it is basically
 "good practice". ``PyFloat_FromDouble`` can return ``NULL`` on failure.
-``NULL`` is a pretty bad reference to have and neither Python nor Theano
+``NULL`` is a pretty bad reference to have and neither Python nor Aesara
 like it. If this happens, we change the ``NULL`` pointer (which will
 cause us problems) to a pointer to ``None`` (which is *not* a ``NULL``
 pointer). Since ``None`` is an object like the others, we need to
@@ -364,7 +364,7 @@ Second, whenever you use ``%(fail)s`` in ``c_extract`` or in the code of an
 :ref:`operation <op>`, you can count on ``c_cleanup`` being called right
 after that. Therefore, it's important to make sure that ``c_cleanup``
 doesn't depend on any code placed after a reference to
-``%(fail)s``. Furthermore, because of the way Theano blocks code together,
+``%(fail)s``. Furthermore, because of the way Aesara blocks code together,
 only the variables declared in ``c_declare`` will be visible in ``c_cleanup``!
 
 
@@ -395,13 +395,13 @@ like this:
 
 .. code-block:: c
 
-   // BEGIN defined by Theano
+   // BEGIN defined by Aesara
    PyObject* py_x = ...;
    PyObject* py_y = ...;
    PyObject* py_z = ...;
    PyObject* py_a = ...; // note: this reference won't actually be used for anything
    PyObject* py_b = ...;
-   // END defined by Theano
+   // END defined by Aesara
 
    {
      double x; //c_declare for x
@@ -466,7 +466,7 @@ Final version
 
 .. testcode::
 
-   from theano.graph.type import
+   from aesara.graph.type import
 
    class Double(Type):
 
@@ -521,21 +521,21 @@ DeepCopyOp
 ==========
 
 We have an internal Op called DeepCopyOp. It is used to make sure we
-respect the user vs Theano memory region as described in the :ref:`tutorial
-<aliasing>`. Theano has a Python implementation that calls the object's
-``copy()`` or ``deepcopy()`` method for Theano types for which it does not
+respect the user vs Aesara memory region as described in the :ref:`tutorial
+<aliasing>`. Aesara has a Python implementation that calls the object's
+``copy()`` or ``deepcopy()`` method for Aesara types for which it does not
 know how to generate C code.
 
 You can implement c_code for this op. You register it like this:
 
 .. code-block:: python
 
-   theano.compile.ops.register_deep_copy_op_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=())
+   aesara.compile.ops.register_deep_copy_op_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=())
 
 In your C code, you should use %(iname)s and %(oname)s to represent
 the C variable names of the DeepCopyOp input and output
 respectively. See an example for the type ``GpuArrayType`` (GPU
-array) in the file `theano/gpuarray/type.py`. The version
+array) in the file `aesara/gpuarray/type.py`. The version
 parameter is what is returned by DeepCopyOp.c_code_cache_version(). By
 default, it will recompile the c code for each process.
 
@@ -552,7 +552,7 @@ calling:
 
 .. code-block:: python
 
-   theano.compile.ops.register_view_op_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=())
+   aesara.compile.ops.register_view_op_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=())
 
 In your C code, you should use %(iname)s and %(oname)s to represent
 the C variable names of the ViewOp input and output
@@ -566,13 +566,13 @@ Shape and Shape_i
 =================
 
 We have 2 generic `Op`s, `Shape` and `Shape_i`, that return the shape of any
-Theano `Variable` that has a shape attribute (`Shape_i` returns only one of
+Aesara `Variable` that has a shape attribute (`Shape_i` returns only one of
 the elements of the shape).
 
 
 .. code-block:: python
 
-   from theano.theano.shape import register_shape_c_code, register_shape_i_c_code
+   from aesara.tensor.shape import register_shape_c_code, register_shape_i_c_code
 
    register_shape_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=())
    register_shape_i_c_code(YOUR_TYPE_CLASS, THE_C_CODE, CHECK_INPUT, version=())
diff --git a/doc/extending/extending_theano.txt b/doc/extending/extending_aesara.txt
similarity index 90%
rename from doc/extending/extending_theano.txt
rename to doc/extending/extending_aesara.txt
index 5acf8bbdff..78f34c34fd 100644
--- a/doc/extending/extending_theano.txt
+++ b/doc/extending/extending_aesara.txt
@@ -1,5 +1,5 @@
 
-.. _extending_theano:
+.. _extending_aesara:
 
 Creating a new Op: Python implementation
 ========================================
@@ -8,12 +8,12 @@ So suppose you have looked through the library documentation and you don't see
 a function that does what you want.
 
 If you can implement something in terms of existing Ops, you should do that.
-Odds are your function that uses existing Theano expressions is short,
+Odds are your function that uses existing Aesara expressions is short,
 has no bugs, and potentially profits from optimizations that have already been
 implemented.
 
 However, if you cannot implement an Op in terms of existing Ops, you have to
-write a new one. Don't worry, Theano was designed to make it easy to add new
+write a new one. Don't worry, Aesara was designed to make it easy to add new
 Ops, Types, and Optimizations.
 
 .. These first few pages will walk you through the definition of a new :ref:`type`,
@@ -35,25 +35,25 @@ As an illustration, this tutorial shows how to write a simple Python-based
     :ref:`views_and_inplace` for an explanation on how to do this.
 
     If your op returns a view or changes the value of its inputs
-    without doing as prescribed in that page, Theano will run, but will
+    without doing as prescribed in that page, Aesara will run, but will
     return correct results for some graphs and wrong results for others.
 
-    It is recommended that you run your tests in DebugMode (Theano *flag*
+    It is recommended that you run your tests in DebugMode (Aesara *flag*
     ``mode=DebugMode``) since it verifies if your op behaves correctly in this
     regard.
 
 
-Theano Graphs refresher
+Aesara Graphs refresher
 -----------------------
 
 .. image:: ../hpcs2011_tutorial/pics/apply_node.png
     :width: 500 px
 
-Theano represents symbolic mathematical computations as graphs. Those graphs
+Aesara represents symbolic mathematical computations as graphs. Those graphs
 are bi-partite graphs (graphs with 2 types of nodes), they are composed of
 interconnected :ref:`apply` and :ref:`variable` nodes.
 :ref:`variable` nodes represent data in the graph, either inputs, outputs or
-intermediary values. As such, Inputs and Outputs of a graph are lists of Theano
+intermediary values. As such, Inputs and Outputs of a graph are lists of Aesara
 :ref:`variable` nodes. :ref:`apply` nodes perform computation on these
 variables to produce new variables. Each :ref:`apply` node has a link to an
 instance of :ref:`Op` which describes the computation to perform. This tutorial
@@ -73,8 +73,8 @@ possibilities you may encounter or need.  For that refer to
 
 .. testcode:: python
 
-    import theano
-    from theano.graph.op import Op
+    import aesara
+    from aesara.graph.op import Op
 
 
     class MyOp(Op):
@@ -96,7 +96,7 @@ possibilities you may encounter or need.  For that refer to
             pass
 
         # Other type of implementation
-        # C implementation: [see theano web site for other functions]
+        # C implementation: [see aesara web site for other functions]
         def c_code(self, node, inputs, outputs, sub):
             pass
 
@@ -132,7 +132,7 @@ or :func:`make_thunk`.
       with the current op. If the op cannot be applied on the provided
       input types, it must raises an exception (such as :class:`TypeError`).
     - it operates on the Variables found in
-      ``*inputs`` in Theano's symbolic language to infer the type of
+      ``*inputs`` in Aesara's symbolic language to infer the type of
       the symbolic output Variables. It creates output Variables of a suitable
       symbolic `Type` to serve as the outputs of this op's
       application.
@@ -169,7 +169,7 @@ or :func:`make_thunk`.
   :class:`Op` allows some other way to define the op implentation.
   For instance, it is possible to define :meth:`Op.c_code` to provide a
   C-implementation to the op. Please refers to tutorial
-  :ref:`extending_theano_c` for a description of :meth:`Op.c_code` and other
+  :ref:`extending_aesara_c` for a description of :meth:`Op.c_code` and other
   related c_methods. Note that an op can provide both Python and C
   implementation.
 
@@ -198,7 +198,7 @@ or :func:`make_thunk`.
   :func:`make_thunk` is useful if you want to generate code and compile
   it yourself.
 
-  If :func:`make_thunk()` is defined by an op, it will be used by Theano
+  If :func:`make_thunk()` is defined by an op, it will be used by Aesara
   to obtain the op's implementation.
   :func:`perform` and :meth:`Op.c_code` will be ignored.
 
@@ -242,7 +242,7 @@ There are other methods that can be optionally defined by the op:
   The :func:`infer_shape` method allows an `Op` to infer the shape of its
   output variables without actually computing them.
   It takes as input ``fgraph``, a `FunctionGraph`; ``node``, a reference to the op Apply node;
-  and a list of Theano symbolic Varables (``i0_shape``, ``i1_shape``, ...)
+  and a list of Aesara symbolic Varables (``i0_shape``, ``i1_shape``, ...)
   which are the shape of the op input Variables.
   :func:`infer_shape` returns a list where each element is a tuple representing
   the shape of one output.
@@ -253,8 +253,8 @@ There are other methods that can be optionally defined by the op:
   The :func:`grad` method is required if you want to differentiate some cost
   whose expression includes your op. The gradient may be
   specified symbolically in this method. It takes two arguments ``inputs`` and
-  ``output_gradients`` which are both lists of symbolic Theano Variables and
-  those must be operated on using Theano's symbolic language. The grad
+  ``output_gradients`` which are both lists of symbolic Aesara Variables and
+  those must be operated on using Aesara's symbolic language. The grad
   method must return a list containing one Variable for each
   input. Each returned Variable represents the gradient with respect
   to that input computed based on the symbolic gradients with respect
@@ -266,7 +266,7 @@ There are other methods that can be optionally defined by the op:
   NullType for that input. Please refer to :func:`grad` for a more detailed
   view.
 
-  The :func:`R_op` method is needed if you want ``theano.gradient.Rop`` to
+  The :func:`R_op` method is needed if you want ``aesara.gradient.Rop`` to
   work with your `Op`.
   This function implements the application of the R-operator on the
   function represented by your `Op`. Let assume that function is :math:`f`,
@@ -285,16 +285,16 @@ Example: Op definition
 
 .. testcode:: example
 
-    import theano
-    from theano.graph.op import Op
-    from theano.graph.basic import Apply
+    import aesara
+    from aesara.graph.op import Op
+    from aesara.graph.basic import Apply
 
 
     class DoubleOp1(Op):
         __props__ = ()
 
         def make_node(self, x):
-            x = theano.tensor.as_tensor_variable(x)
+            x = aesara.tensor.as_tensor_variable(x)
             # Note: using x_.type() is dangerous, as it copies x's broadcasting
             # behaviour
             return Apply(self, [x], [x.type()])
@@ -327,8 +327,8 @@ Example: Op definition
     class DoubleOp2(Op):
         __props__ = ()
 
-        itypes = [theano.tensor.dmatrix]
-        otypes = [theano.tensor.dmatrix]
+        itypes = [aesara.tensor.dmatrix]
+        otypes = [aesara.tensor.dmatrix]
 
         def perform(self, node, inputs, output_storage):
             x = inputs[0]
@@ -363,7 +363,7 @@ a ``.op`` attribute that refers to ``doubleOp1``.
 
 .. The first two methods in the Op are relatively boilerplate: ``__eq__``
 .. and ``__hash__``.
-.. When two Ops are equal, Theano will merge their outputs if they are applied to the same inputs.
+.. When two Ops are equal, Aesara will merge their outputs if they are applied to the same inputs.
 .. The base class (Op) says two objects are equal if (and only if)
 .. they are the same object.
 .. Writing these boilerplate definitions ensures that the logic of the equality comparison is always explicit.
@@ -379,7 +379,7 @@ a ``.op`` attribute that refers to ``doubleOp1``.
 .. arguments to the constructor. If we had done that, and if that different
 .. configuration made ``fibby2`` compute different results from ``fibby`` (for the
 .. same inputs) then we would have to add logic to the ``__eq__`` and ``__hash__``
-.. function so that he two ``Fibby`` Ops would *not be equal*.  The reason why: Theano's merge
+.. function so that he two ``Fibby`` Ops would *not be equal*.  The reason why: Aesara's merge
 .. optimization looks for Ops comparing equal and merges them. If two Ops compare
 .. equal but don't always produce equal results from equal inputs, then you might
 .. see wrong calculation.
@@ -388,7 +388,7 @@ The ``make_node`` method creates a node to be included in the expression graph.
 It runs when we apply our Op (``doubleOp1``) to the Variable (``x``), as
 in ``doubleOp1(tensor.vector())``.
 When an Op has multiple inputs, their order in the inputs argument to ``Apply``
-is important:  Theano will call ``make_node(*inputs)`` to copy the graph,
+is important:  Aesara will call ``make_node(*inputs)`` to copy the graph,
 so it is important not to change the semantics of the expression by changing
 the argument order.
 
@@ -414,9 +414,9 @@ You can try the new Op as follows:
 
 .. testcode:: example
 
-    import theano
-    x = theano.tensor.matrix()
-    f = theano.function([x], DoubleOp1()(x))
+    import aesara
+    x = aesara.tensor.matrix()
+    f = aesara.function([x], DoubleOp1()(x))
     import numpy
     inp = numpy.random.rand(5, 4)
     out = f(inp)
@@ -445,9 +445,9 @@ You can try the new Op as follows:
 
 .. testcode:: example
 
-    import theano
-    x = theano.tensor.matrix()
-    f = theano.function([x], DoubleOp2()(x))
+    import aesara
+    x = aesara.tensor.matrix()
+    f = aesara.function([x], DoubleOp2()(x))
     import numpy
     inp = numpy.random.rand(5, 4)
     out = f(inp)
@@ -488,9 +488,9 @@ and ``b`` are equal.
 
 .. testcode:: properties
 
-    import theano
-    from theano.graph.op import Op
-    from theano.graph.basic import Apply
+    import aesara
+    from aesara.graph.op import Op
+    from aesara.graph.basic import Apply
 
 
     class AXPBOp(Op):
@@ -505,7 +505,7 @@ and ``b`` are equal.
             super().__init__()
 
         def make_node(self, x):
-            x = theano.tensor.as_tensor_variable(x)
+            x = aesara.tensor.as_tensor_variable(x)
             return Apply(self, [x], [x.type()])
 
         def perform(self, node, inputs, output_storage):
@@ -536,9 +536,9 @@ We can test this by running the following segment:
     assert mult4plus5op == another_mult4plus5op
     assert mult4plus5op != mult2plus3op
 
-    x = theano.tensor.matrix()
-    f = theano.function([x], mult4plus5op(x))
-    g = theano.function([x], mult2plus3op(x))
+    x = aesara.tensor.matrix()
+    f = aesara.function([x], mult4plus5op(x))
+    g = aesara.function([x], mult2plus3op(x))
 
     import numpy
     inp = numpy.random.rand(5, 4).astype(numpy.float32)
@@ -549,7 +549,7 @@ We can test this by running the following segment:
 How To Test it
 --------------
 
-Theano has some functionalities to simplify testing. These help test the
+Aesara has some functionalities to simplify testing. These help test the
 ``infer_shape``, ``grad`` and ``R_op`` methods. Put the following code
 in a file and execute it with the ``pytest`` program.
 
@@ -564,10 +564,10 @@ returns the right answer. If you detect an error, you must raise an
 .. testcode:: tests
 
     import numpy
-    import theano
+    import aesara
 
     from tests import unittest_tools as utt
-    from theano.configdefaults import config
+    from aesara.configdefaults import config
     class TestDouble(utt.InferShapeTester):
         def setup_method(self):
             super().setup_method()
@@ -575,8 +575,8 @@ returns the right answer. If you detect an error, you must raise an
             self.op = DoubleOp()
 
         def test_basic(self):
-            x = theano.tensor.matrix()
-            f = theano.function([x], self.op(x))
+            x = aesara.tensor.matrix()
+            f = aesara.function([x], self.op(x))
             inp = numpy.asarray(numpy.random.rand(5, 4), dtype=config.floatX)
             out = f(inp)
             # Compare the result computed to the expected value.
@@ -584,7 +584,7 @@ returns the right answer. If you detect an error, you must raise an
 
 We call ``utt.assert_allclose(expected_value, value)`` to compare
 NumPy ndarray.This raise an error message with more information. Also,
-the default tolerance can be changed with the Theano flags
+the default tolerance can be changed with the Aesara flags
 ``config.tensor__cmp_sloppy`` that take values in 0, 1 and 2. The
 defaul value do the most strict comparison, 1 and 2 make less strict
 comparison.
@@ -600,9 +600,9 @@ itself. Additionally, it checks that the optimized graph computes
 the correct shape, by comparing it to the actual shape of the computed
 output.
 
-``self._compile_and_check`` compiles a Theano function. It takes as
-parameters the lists of input and output Theano variables, as would be
-provided to ``theano.function``, and a list of real values to pass to the
+``self._compile_and_check`` compiles an Aesara function. It takes as
+parameters the lists of input and output Aesara variables, as would be
+provided to ``aesara.function``, and a list of real values to pass to the
 compiled function. It also takes the op class as a parameter
 in order to verify that no instance of it appears in the shape-optimized graph.
 
@@ -623,13 +623,13 @@ your op works only with such matrices, you can disable the warning with the
 .. testcode:: tests
 
     from tests import unittest_tools as utt
-    from theano.configdefaults import config
+    from aesara.configdefaults import config
     class TestDouble(utt.InferShapeTester):
         # [...] as previous tests.
         def test_infer_shape(self):
-            x = theano.tensor.matrix()
-            self._compile_and_check([x],  # theano.function inputs
-                                    [self.op(x)],  # theano.function outputs
+            x = aesara.tensor.matrix()
+            self._compile_and_check([x],  # aesara.function inputs
+                                    [self.op(x)],  # aesara.function outputs
                                     # Always use not square matrix!
                                     # inputs data
                                     [numpy.asarray(numpy.random.rand(5, 4),
@@ -641,7 +641,7 @@ Testing the gradient
 ^^^^^^^^^^^^^^^^^^^^
 
 The function :ref:`verify_grad <validating_grad>`
-verifies the gradient of an op or Theano graph. It compares the
+verifies the gradient of an op or Aesara graph. It compares the
 analytic (symbolically computed) gradient and the numeric
 gradient (computed through the Finite Difference Method).
 
@@ -718,7 +718,7 @@ Modify and execute the example to return two outputs: x + y and x - y.
 You can omit the Rop functions. Try to implement the testing apparatus
 described above.
 
-(Notice that Theano's current *elemwise fusion* optimization is
+(Notice that Aesara's current *elemwise fusion* optimization is
 only applicable to computations involving a single output. Hence, to gain
 efficiency over the basic solution that is asked here, the two operations would
 have to be jointly optimized explicitly in the code.)
@@ -738,14 +738,14 @@ don't forget to call the parent ``setUp`` function.
 For more details see :ref:`random_value_in_tests`.
 
 
-:download:`Solution<extending_theano_solution_1.py>`
+:download:`Solution<extending_aesara_solution_1.py>`
 
 
 as_op
 -----
 
 as_op is a python decorator that converts a python function into a
-basic Theano op that will call the supplied function during execution.
+basic Aesara op that will call the supplied function during execution.
 
 This isn't the recommended way to build an op, but allows for a quick
 implementation.
@@ -778,7 +778,7 @@ signature:
 .. note::
 
     It converts the Python function to a callable object that takes as
-    inputs Theano variables that were declared.
+    inputs Aesara variables that were declared.
 
 .. note::
     The python function wrapped by the `as_op` decorator needs to return a new
@@ -789,17 +789,17 @@ as_op Example
 
 .. testcode:: asop
 
-    import theano
+    import aesara
     import numpy
-    from theano import function
-    from theano.compile.ops import as_op
+    from aesara import function
+    from aesara.compile.ops import as_op
 
     def infer_shape_numpy_dot(fgraph, node, input_shapes):
         ashp, bshp = input_shapes
         return [ashp[:-1] + bshp[-1:]]
 
-    @as_op(itypes=[theano.tensor.fmatrix, theano.tensor.fmatrix],
-           otypes=[theano.tensor.fmatrix], infer_shape=infer_shape_numpy_dot)
+    @as_op(itypes=[aesara.tensor.fmatrix, aesara.tensor.fmatrix],
+           otypes=[aesara.tensor.fmatrix], infer_shape=infer_shape_numpy_dot)
     def numpy_dot(a, b):
        return numpy.dot(a, b)
 
@@ -807,8 +807,8 @@ You can try it as follows:
 
 .. testcode:: asop
 
-    x = theano.tensor.fmatrix()
-    y = theano.tensor.fmatrix()
+    x = aesara.tensor.fmatrix()
+    y = aesara.tensor.fmatrix()
     f = function([x, y], numpy_dot(x, y))
     inp1 = numpy.random.rand(5, 4).astype('float32')
     inp2 = numpy.random.rand(4, 7).astype('float32')
@@ -853,7 +853,7 @@ Final Note
 ----------
 
 A more extensive discussion of this section's content may be found in
-the advanced tutorial :ref:`Extending Theano<extending>`.
+the advanced tutorial :ref:`Extending Aesara<extending>`.
 
 The section :ref:`Other ops <other_ops>` includes more instructions for
 the following specific cases:
diff --git a/doc/extending/extending_theano_c.txt b/doc/extending/extending_aesara_c.txt
similarity index 95%
rename from doc/extending/extending_theano_c.txt
rename to doc/extending/extending_aesara_c.txt
index f3ef928579..30a110e15f 100644
--- a/doc/extending/extending_theano_c.txt
+++ b/doc/extending/extending_aesara_c.txt
@@ -1,19 +1,19 @@
 
-.. _extending_theano_c:
+.. _extending_aesara_c:
 
 ============================
-Extending Theano with a C Op
+Extending Aesara with a C Op
 ============================
 
-This tutorial covers how to extend Theano with an op that offers a C
+This tutorial covers how to extend Aesara with an op that offers a C
 implementation. It does not cover ops that run on a GPU but it does introduce
 many elements and concepts which are relevant for GPU ops. This tutorial is
-aimed at individuals who already know how to extend Theano (see tutorial
-:ref:`extending_theano`) by adding a new op with a Python implementation
+aimed at individuals who already know how to extend Aesara (see tutorial
+:ref:`extending_aesara`) by adding a new op with a Python implementation
 and will only cover the additional knowledge required to also produce ops
 with C implementations.
 
-Providing a Theano op with a C implementation requires to interact with
+Providing an Aesara op with a C implementation requires to interact with
 Python's C-API and Numpy's C-API. Thus, the first step of this tutorial is to
 introduce both and highlight their features which are most relevant to the
 task of implementing a C op. This tutorial then introduces the most important
@@ -50,7 +50,7 @@ PyObjects implement reference counting and the Python C-API defines a number
 of macros to help manage those reference counts. The definition of these
 macros can be found here : `Python C-API Reference Counting
 <https://docs.python.org/2/c-api/refcounting.html>`_. Listed below are the
-two macros most often used in Theano C ops.
+two macros most often used in Aesara C ops.
 
 
 .. method:: void Py_XINCREF(PyObject *o)
@@ -85,11 +85,11 @@ NumPy C-API
 
 The NumPy library provides a C-API to allow users to create, access and
 manipulate NumPy arrays from within their own C routines. NumPy's ndarrays
-are used extensively inside Theano and so extending Theano with a C op will
+are used extensively inside Aesara and so extending Aesara with a C op will
 require interaction with the NumPy C-API.
 
 This sections covers the API's elements that are often required to write code
-for a Theano C op. The full documentation for the API can be found here :
+for an Aesara C op. The full documentation for the API can be found here :
 `NumPy C-API <http://docs.scipy.org/doc/numpy/reference/c-api.html>`_.
 
 
@@ -269,14 +269,14 @@ instead defines functions that will **return** the C code to the caller.
 
 This is because calling C code from Python code comes with a significant
 overhead. If every op was responsible for executing its own C code, every
-time a Theano function was called, this overhead would occur as many times
+time an Aesara function was called, this overhead would occur as many times
 as the number of ops with C implementations in the function's computational
 graph.
 
-To maximize performance, Theano instead requires the C ops to simply return
+To maximize performance, Aesara instead requires the C ops to simply return
 the code needed for their execution and takes upon itself the task of
 organizing, linking and compiling the code from the various ops. Through this,
-Theano is able to minimize the number of times C code is called from Python
+Aesara is able to minimize the number of times C code is called from Python
 code.
 
 The following is a very simple example to illustrate how it's possible to
@@ -289,8 +289,8 @@ the C code from each op and then define your own C module that would call
 the C code from each op in succession. In this case, the overhead would only
 occur once; when calling your custom module itself.
 
-Moreover, the fact that Theano itself takes care of compiling the C code,
-instead of the individual ops, allows Theano to easily cache the compiled C
+Moreover, the fact that Aesara itself takes care of compiling the C code,
+instead of the individual ops, allows Aesara to easily cache the compiled C
 code. This allows for faster compilation times.
 
 See :ref:`cop` for the full documentation of the various methods of the
@@ -384,9 +384,9 @@ commonly used.
     each apply.
 
     Both ``c_support_code`` and ``c_support_code_apply`` are necessary
-    because a Theano `Op` can be used more than once in a given Theano
+    because an Aesara `Op` can be used more than once in a given Aesara
     function. For example, an `Op` that adds two matrices could be used at some
-    point in the Theano function to add matrices of integers and, at another
+    point in the Aesara function to add matrices of integers and, at another
     point, to add matrices of doubles. Because the dtype of the inputs and
     outputs can change between different applies of the `Op`, any support code
     that relies on a certain dtype is specific to a given `Apply` of the `Op` and
@@ -397,15 +397,15 @@ commonly used.
     Returns a tuple of integers representing the version of the C code in this
     op. Ex : (1, 4, 0) for version 1.4.0
 
-    This tuple is used by Theano to cache the compiled C code for this `Op`. As
+    This tuple is used by Aesara to cache the compiled C code for this `Op`. As
     such, the return value **MUST BE CHANGED** every time the C code is altered
-    or else Theano will disregard the change in the code and simply load a
+    or else Aesara will disregard the change in the code and simply load a
     previous version of the `Op` from the cache. If you want to avoid caching of
     the C code of this `Op`, return an empty tuple or do not implement this
     method.
 
     :note:
-        Theano can handle tuples of any hashable objects as return values
+        Aesara can handle tuples of any hashable objects as return values
         for this function but, for greater readability and easier management,
         this function should return a tuple of integers as previously
         described.
@@ -423,7 +423,7 @@ not modify any of the inputs.
 TODO: EXPLAIN DESTROYMAP and VIEWMAP BETTER AND GIVE EXAMPLE.
 
 When developing a `COp`, you should run computations in `DebugMode`, by using
-argument ``mode='DebugMode'`` to ``theano.function``. `DebugMode` is
+argument ``mode='DebugMode'`` to ``aesara.function``. `DebugMode` is
 slow, but it can catch many common violations of the `Op` contract.
 
 TODO: Like what? How? Talk about Python vs. C too.
@@ -455,16 +455,16 @@ managed. Also take note of how the new variables required for the op's
 computation are declared in a new scope to avoid cross-initialization errors.
 
 Also, in the C code, it is very important to properly validate the inputs
-and outputs storage. Theano guarantees that the inputs exist and have the
+and outputs storage. Aesara guarantees that the inputs exist and have the
 right number of dimensions but it does not guarantee their exact shape. For
 instance, if an op computes the sum of two vectors, it needs to validate that
 its two inputs have the same shape. In our case, we do not need to validate
 the exact shapes of the inputs because we don't have a need that they match
 in any way.
 
-For the outputs, things are a little bit more subtle. Theano does not
+For the outputs, things are a little bit more subtle. Aesara does not
 guarantee that they have been allocated but it does guarantee that, if they
-have been allocated, they have the right number of dimension. Again, Theano
+have been allocated, they have the right number of dimension. Again, Aesara
 offers no guarantee on the exact shapes. This means that, in our example, we
 need to validate that the output storage has been allocated and has the same
 shape as our vector input. If it is not the case, we allocate a new output
@@ -473,10 +473,10 @@ storage with the right shape and number of dimensions.
 .. testcode:: examples
 
     import numpy
-    import theano
+    import aesara
 
-    from theano.graph.op import COp
-    from theano.graph.basic import Apply
+    from aesara.graph.op import COp
+    from aesara.graph.basic import Apply
 
 
     class VectorTimesScalar(COp):
@@ -615,8 +615,8 @@ C code.
                 raise TypeError('y must be a 1-d vector')
 
             # Create an output variable of the same type as x
-            output_var = theano.tensor.type.TensorType(
-                            dtype=theano.scalar.upcast(x.dtype, y.dtype),
+            output_var = aesara.tensor.type.TensorType(
+                            dtype=aesara.scalar.upcast(x.dtype, y.dtype),
                             broadcastable=[False])()
 
             return Apply(self, [x, y], [output_var])
@@ -716,12 +716,12 @@ Alternate way of defining C Ops
 ===============================
 
 The two previous examples have covered the standard way of implementing C Ops
-in Theano by inheriting from the class :class:`Op`. This process is mostly
+in Aesara by inheriting from the class :class:`Op`. This process is mostly
 simple but it still involves defining many methods as well as mixing, in the
 same file, both Python and C code which tends to make the result less
 readable.
 
-To help with this, Theano defines a class, ``ExternalCOp``, from which new C ops
+To help with this, Aesara defines a class, ``ExternalCOp``, from which new C ops
 can inherit. The class ``ExternalCOp`` aims to simplify the process of implementing
 C ops by doing the following :
 
@@ -744,8 +744,8 @@ The new op is defined inside a Python file with the following code :
 
 .. testcode::
 
-    import theano
-    from theano.graph.op import ExternalCOp
+    import aesara
+    from aesara.graph.op import ExternalCOp
 
     class VectorTimesVector(ExternalCOp):
         __props__ = ()
@@ -764,8 +764,8 @@ The new op is defined inside a Python file with the following code :
                 raise TypeError('y must be a 1-d vector')
 
             # Create an output variable of the same type as x
-            output_var = theano.tensor.type.TensorType(
-                            dtype=theano.scalar.upcast(x.dtype, y.dtype),
+            output_var = aesara.tensor.type.TensorType(
+                            dtype=aesara.scalar.upcast(x.dtype, y.dtype),
                             broadcastable=[False])()
 
             return Apply(self, [x, y], [output_var])
@@ -885,14 +885,14 @@ If you pass a function name to the ``__init__()`` method of the
 *       It must return an int. The value of that int indicates whether
         the `Op` could perform its task or not. A value of 0 indicates
         success while any non-zero value will interrupt the execution
-        of the Theano function.  When returning non-zero the function
+        of the Aesara function.  When returning non-zero the function
         must set a python exception indicating the details of the
         problem.
 
 *       It must receive one argument for each input to the `Op` followed
         by one pointer to an argument for each output of the `Op`.  The
         types for the argument is dependant on the Types (that is
-        theano Types) of your inputs and outputs.
+        aesara Types) of your inputs and outputs.
 
 *       You can sepcify the number of inputs and outputs for your `Op`
         by setting the ``_cop_num_inputs`` and ``_cop_num_outputs``
@@ -1032,9 +1032,9 @@ Using GDB to debug COp's C code
 ==============================
 
 When debugging C code, it can be useful to use GDB for code compiled
-by Theano.
+by Aesara.
 
-For this, you must enable this Theano: `cmodule__remove_gxx_opt=True`.
+For this, you must enable this Aesara: `cmodule__remove_gxx_opt=True`.
 For the GPU, you must add in this second flag `nvcc.flags=-g` (it slow
 down computation on the GPU, but it is enabled by default on the CPU).
 
@@ -1044,7 +1044,7 @@ process:
 .. code-block:: sh
 
     $gdb python
-    (gdb)r pytest theano/
+    (gdb)r pytest aesara/
 
 `Quick guide to GDB <https://www.cs.cmu.edu/~gilpin/tutorial/>`_.
 
@@ -1052,5 +1052,5 @@ Final Note
 ==========
 
 This tutorial focuses on providing C implementations to `COp`s that manipulate
-Theano tensors. For more information about other Theano types, you can refer
-to the section :ref:`Alternate Theano Types <alternate_theano_types>`.
+Aesara tensors. For more information about other Aesara types, you can refer
+to the section :ref:`Alternate Aesara Types <alternate_aesara_types>`.
diff --git a/doc/extending/extending_theano_gpu.txt b/doc/extending/extending_aesara_gpu.txt
similarity index 88%
rename from doc/extending/extending_theano_gpu.txt
rename to doc/extending/extending_aesara_gpu.txt
index ce599be78c..19e843671f 100644
--- a/doc/extending/extending_theano_gpu.txt
+++ b/doc/extending/extending_aesara_gpu.txt
@@ -1,18 +1,18 @@
 
-.. _extending_theano_gpu:
+.. _extending_aesara_gpu:
 
 ==============================
-Extending Theano with a GPU Op
+Extending Aesara with a GPU Op
 ==============================
 
 .. note::
 
     This covers the :ref:`gpuarray <gpuarray>` back-end for the GPU.
 
-This tutorial covers how to extend Theano with an op that offers a GPU
+This tutorial covers how to extend Aesara with an op that offers a GPU
 implementation.  It assumes you are familiar with how to write new
-Theano ops.  If that is not the case you should probably follow the
-:ref:`extending_theano` and :ref:`extending_theano_c` sections before
+Aesara ops.  If that is not the case you should probably follow the
+:ref:`extending_aesara` and :ref:`extending_aesara_c` sections before
 continuing on.
 
 Writing a new GPU op can be done in Python for some simple tasks, but
@@ -30,7 +30,7 @@ transfer.  It might also be the case that your inputs are not all from
 the same context and you would have to choose which one to run on.
 
 In order to support all of those options and have a consistent
-interface, :func:`theano.gpuarray.basic_ops.infer_context_name` was
+interface, :func:`aesara.gpuarray.basic_ops.infer_context_name` was
 written.  An example usage is below::
 
     def make_node(self, a, b, c):
@@ -45,7 +45,7 @@ one or more of your inputs is not supposed to be on the GPU, you
 should not pass it to :func:`infer_context_name` or call
 :func:`as_gpuarray_variable` on it.
 
-Also note that :func:`theano.gpuarray.basic_ops.as_gpuarray_variable`
+Also note that :func:`aesara.gpuarray.basic_ops.as_gpuarray_variable`
 takes ``context_name`` as a mandatory parameter.  This is because it's
 not enough to know you want the value to be on the GPU, you also want
 to know which GPU to put it on.  In almost all cases, you can pass in
@@ -71,12 +71,12 @@ context of your inputs through the type of the variables::
 Note that ``GpuArrayType`` objects also have a ``context_name``
 attribute which is the symbolic equivalent of ``context``.  It can't
 be used for calls to pygpu or libgpuarray, but it should be used for
-theano operations and variables.
+aesara operations and variables.
 
 The last place where you might need the context is in the C
 initialization code.  For that you will have to use the :ref:`params
 <extending_op_params>`.  The params type should be
-:data:`theano.gpuarray.type.gpu_context_type` and the params object
+:data:`aesara.gpuarray.type.gpu_context_type` and the params object
 should be a context object from one of your input variables::
 
     def get_params(self, node):
@@ -84,8 +84,8 @@ should be a context object from one of your input variables::
 
 If you don't have any input variables on the GPU you can follow the
 the example of :class:`GpuFromHost
-<theano.gpuarray.basic_ops.GpuFromHost>` or :class:`GpuEye
-<theano.gpuarray.basic_ops.GpuEye>`.  This is not a case that you
+<aesara.gpuarray.basic_ops.GpuFromHost>` or :class:`GpuEye
+<aesara.gpuarray.basic_ops.GpuEye>`.  This is not a case that you
 should encounter often, so it will not be covered further.
 
 Defining New Kernels
@@ -94,14 +94,14 @@ Defining New Kernels
 If your op needs to do some transformation on the data, chances are
 that you will need to write a new kernel.  The best way to do this is
 to leverage :class:`GpuKernelBase
-<theano.gpuarray.basic_ops.GpuKernelBase>` (or :class:`CGpuKernelBase
-<theano.gpuarray.basic_ops.CGpuKernelBase>` if you want to use the
-:class:`ExternalCOp <theano.graph.op.ExternalCOp>` functionality).
+<aesara.gpuarray.basic_ops.GpuKernelBase>` (or :class:`CGpuKernelBase
+<aesara.gpuarray.basic_ops.CGpuKernelBase>` if you want to use the
+:class:`ExternalCOp <aesara.graph.op.ExternalCOp>` functionality).
 
 For plain :class:`GpuKernelBase
-<theano.gpuarray.basic_ops.GpuKernelBase>`, you have to define a
+<aesara.gpuarray.basic_ops.GpuKernelBase>`, you have to define a
 method called ``gpu_kernels`` which returns a list of :class:`Kernel
-<theano.gpuarray.basic_ops.Kernel>` objects.  You can define as many
+<aesara.gpuarray.basic_ops.Kernel>` objects.  You can define as many
 kernels as you want for a single op.  An example would look like
 this::
 
@@ -191,14 +191,14 @@ Dealing with float16
 To support limited-precision storage in a kernel you have to be
 careful to load values properly, declare working memory in float32 and
 write results properly.  To help with that some functions have been
-declared in :mod:`theano.gpuarray.fp16_help`.
+declared in :mod:`aesara.gpuarray.fp16_help`.
 
 To load the inputs you should wrap the read with the function returned
-by :func:`load_w() <theano.gpuarray.fp16_help.load_w>`. Similarly writes
+by :func:`load_w() <aesara.gpuarray.fp16_help.load_w>`. Similarly writes
 should be wrapped in the function returned by :func:`write_w()
-<theano.gpuarray.fp16_help.write_w>`.  Finally working data should
+<aesara.gpuarray.fp16_help.write_w>`.  Finally working data should
 have the type returned by :func:`work_dtype()
-<theano.gpuarray.fp16_help.work_dtype>`.
+<aesara.gpuarray.fp16_help.work_dtype>`.
 
 Here is a +1 kernel that is not ready to deal with float16 input::
 
@@ -254,7 +254,7 @@ GpuKernelBase
 Python File
 ~~~~~~~~~~~
 
-.. literalinclude:: ../../theano/gpuarray/basic_ops.py
+.. literalinclude:: ../../aesara/gpuarray/basic_ops.py
     :language: python
     :pyobject: GpuEye
 
@@ -264,14 +264,14 @@ CGpuKernelBase
 Python File
 ~~~~~~~~~~~
 
-.. literalinclude:: ../../theano/gpuarray/tests/test_cgpukernelbase.py
+.. literalinclude:: ../../aesara/gpuarray/tests/test_cgpukernelbase.py
     :language: python
     :pyobject: GpuEye
 
 ``tstgpueye.c``
 ~~~~~~~~~~~~~~~
 
-.. literalinclude:: ../../theano/gpuarray/tests/c_code/tstgpueye.c
+.. literalinclude:: ../../aesara/gpuarray/tests/c_code/tstgpueye.c
     :language: C
 
 Wrapping Exisiting Libraries
diff --git a/doc/extending/extending_theano_solution_1.py b/doc/extending/extending_aesara_solution_1.py
similarity index 86%
rename from doc/extending/extending_theano_solution_1.py
rename to doc/extending/extending_aesara_solution_1.py
index e6ba45239e..dadc6e9754 100755
--- a/doc/extending/extending_theano_solution_1.py
+++ b/doc/extending/extending_aesara_solution_1.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python
-# Theano tutorial
-# Solution to Exercise in section 'Extending Theano'
+# Aesara tutorial
+# Solution to Exercise in section 'Extending Aesara'
 import unittest
 
-import theano
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor.type import TensorType
+import aesara
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor.type import TensorType
 
 
 # 1. Op returns x * y
@@ -18,7 +18,7 @@ def make_node(self, x, y):
         y = tt.as_tensor_variable(y)
         outdim = x.ndim
         output = TensorType(
-            dtype=theano.scalar.upcast(x.dtype, y.dtype), broadcastable=[False] * outdim
+            dtype=aesara.scalar.upcast(x.dtype, y.dtype), broadcastable=[False] * outdim
         )()
         return Apply(self, inputs=[x, y], outputs=[output])
 
@@ -43,10 +43,10 @@ def make_node(self, x, y):
         y = tt.as_tensor_variable(y)
         outdim = x.ndim
         output1 = TensorType(
-            dtype=theano.scalar.upcast(x.dtype, y.dtype), broadcastable=[False] * outdim
+            dtype=aesara.scalar.upcast(x.dtype, y.dtype), broadcastable=[False] * outdim
         )()
         output2 = TensorType(
-            dtype=theano.scalar.upcast(x.dtype, y.dtype), broadcastable=[False] * outdim
+            dtype=aesara.scalar.upcast(x.dtype, y.dtype), broadcastable=[False] * outdim
         )()
         return Apply(self, inputs=[x, y], outputs=[output1, output2])
 
@@ -73,11 +73,11 @@ def grad(self, inputs, output_grads):
 import numpy as np
 
 from tests import unittest_tools as utt
-from theano import function, printing
-from theano import tensor as tt
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor.type import dmatrix, matrix
+from aesara import function, printing
+from aesara import tensor as tt
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor.type import dmatrix, matrix
 
 
 class TestProdOp(utt.InferShapeTester):
@@ -91,7 +91,7 @@ def setup_method(self):
     def test_perform(self):
         x = matrix()
         y = matrix()
-        f = theano.function([x, y], self.op_class()(x, y))
+        f = aesara.function([x, y], self.op_class()(x, y))
         x_val = np.random.rand(5, 4)
         y_val = np.random.rand(5, 4)
         out = f(x_val, y_val)
@@ -128,7 +128,7 @@ def setup_method(self):
     def test_perform(self):
         x = matrix()
         y = matrix()
-        f = theano.function([x, y], self.op_class()(x, y))
+        f = aesara.function([x, y], self.op_class()(x, y))
         x_val = np.random.rand(5, 4)
         y_val = np.random.rand(5, 4)
         out = f(x_val, y_val)
@@ -171,8 +171,8 @@ def test_infer_shape(self):
 import numpy as np
 
 # as_op exercice
-import theano
-from theano.compile.ops import as_op
+import aesara
+from aesara.compile.ops import as_op
 
 
 def infer_shape_numpy_dot(fgraph, node, input_shapes):
diff --git a/doc/extending/extending_faq.txt b/doc/extending/extending_faq.txt
index 6c4470b336..3a654eafd7 100644
--- a/doc/extending/extending_faq.txt
+++ b/doc/extending/extending_faq.txt
@@ -2,13 +2,13 @@
 .. _extend_faq:
 
 =========================================
-Extending Theano: FAQ and Troubleshooting
+Extending Aesara: FAQ and Troubleshooting
 =========================================
 
 I wrote a new Op/Type, and weird stuff is happening...
 ------------------------------------------------------
 
-First, check the :ref:`op_contract` and the :ref:`type_contract` 
+First, check the :ref:`op_contract` and the :ref:`type_contract`
 and make sure you're following the rules.
 Then try running your program in :ref:`using_debugmode`.  DebugMode might catch
 something that you're not seeing.
@@ -26,7 +26,5 @@ I wrote a new optimization, and it changed my results even though I'm pretty sur
 ------------------------------------------------------------------------------------------------
 
 First, check the :ref:`op_contract` and make sure you're following the rules.
-Then try running your program in :ref:`using_debugmode`.  DebugMode might 
+Then try running your program in :ref:`using_debugmode`.  DebugMode might
 catch something that you're not seeing.
-
-
diff --git a/doc/extending/fibby.txt b/doc/extending/fibby.txt
index 60f925975b..7dc02aa013 100644
--- a/doc/extending/fibby.txt
+++ b/doc/extending/fibby.txt
@@ -15,9 +15,9 @@ you should check the strides and alignment.
 
 .. testcode::
 
-   import theano
-   from theano.graph.op import Op
-   from theano.graph.basic import Apply
+   import aesara
+   from aesara.graph.op import Op
+   from aesara.graph.basic import Apply
 
 
    class Fibby(Op):
@@ -80,7 +80,7 @@ TensorType also set up ``dtype_%(x)s`` to be a typdef to the C type for ``x``.
 The first line reduces the reference count of the data that y originally
 pointed to. The second line allocates the new data and makes y point to it.
 
-In C code for a theano op, numpy arrays are represented as ``PyArrayObject`` C
+In C code for an Aesara op, numpy arrays are represented as ``PyArrayObject`` C
 structs. This is part of the numpy/scipy C API documented at
 http://docs.scipy.org/doc/numpy/reference/c-api.types-and-structures.html
 
@@ -93,7 +93,7 @@ Writing an Optimization
 
 ``fibby`` of a vector of zeros is another vector of zeros of
 the same size.
-Theano does not attempt to infer this from the code provided via ``Fibby.perform`` or ``Fibby.c_code``.
+Aesara does not attempt to infer this from the code provided via ``Fibby.perform`` or ``Fibby.c_code``.
 However, we can write an optimization that makes use of this observation.
 This sort of local substitution of special cases is common,
 and there is a stage of optimization (specialization) devoted to such optimizations.
@@ -108,12 +108,12 @@ TODO: talk about OPTIMIZATION STAGES
 
 .. testcode::
 
-   from theano.tensor.basic import get_scalar_constant_value
-   from theano.tensor.exceptions import NotScalarConstantError
+   from aesara.tensor.basic import get_scalar_constant_value
+   from aesara.tensor.exceptions import NotScalarConstantError
 
    # Remove any fibby(zeros(...))
-   @theano.tensor.basic_opt.register_specialize
-   @theano.graph.opt.local_optimizer([fibby])
+   @aesara.tensor.basic_opt.register_specialize
+   @aesara.graph.opt.local_optimizer([fibby])
    def fibby_of_zero(fgraph, node):
        if node.op == fibby:
            x = node.inputs[0]
@@ -124,7 +124,7 @@ TODO: talk about OPTIMIZATION STAGES
                pass
 
 The ``register_specialize`` decorator is what activates our optimization, and
-tells Theano to use it in the specialization stage.
+tells Aesara to use it in the specialization stage.
 The ``local_optimizer`` decorator builds a class instance around our global
 function.  The ``[fibby]`` argument is a hint that our optimizer works on nodes
 whose ``.op`` attribute equals ``fibby``.
@@ -141,8 +141,8 @@ Here is some code to test that the optimization is applied only when needed.
 .. testcode::
 
    import numpy
-   import theano.tensor as tt
-   from theano import function
+   import aesara.tensor as tt
+   from aesara import function
 
    # Test it does not apply when not needed
    x = tt.dvector()
@@ -163,9 +163,9 @@ Here is some code to test that the optimization is applied only when needed.
    f_zero()
 
    # Check that the optimization removes the Fibby Op.
-   # For security, the Theano memory interface ensures that the output
+   # For security, the Aesara memory interface ensures that the output
    # of the function is always memory not aliased to the input.
    # That is why there is a DeepCopyOp op.
    topo = f_zero.maker.fgraph.toposort()
    assert len(topo) == 1
-   assert isinstance(topo[0].op, theano.compile.ops.DeepCopyOp)
+   assert isinstance(topo[0].op, aesara.compile.ops.DeepCopyOp)
diff --git a/doc/extending/graphstructures.txt b/doc/extending/graphstructures.txt
index 1e44aa2e67..4fb0010160 100644
--- a/doc/extending/graphstructures.txt
+++ b/doc/extending/graphstructures.txt
@@ -5,11 +5,11 @@
 Graph Structures
 ================
 
-Debugging or profiling code written in Theano is not that simple if you
+Debugging or profiling code written in Aesara is not that simple if you
 do not know what goes on under the hood. This chapter is meant to
-introduce you to a required minimum of the inner workings of Theano.
+introduce you to a required minimum of the inner workings of Aesara.
 
-The first step in writing Theano code is to write down all mathematical
+The first step in writing Aesara code is to write down all mathematical
 relations using symbolic placeholders (**variables**). When writing down
 these expressions you use operations like ``+``, ``-``, ``**``,
 ``sum()``, ``tanh()``. All these are represented internally as **ops**.
@@ -17,7 +17,7 @@ An *op* represents a certain computation on some type of inputs
 producing some type of output. You can see it as a *function definition*
 in most programming languages.
 
-Theano represents symbolic mathematical computations as graphs. These
+Aesara represents symbolic mathematical computations as graphs. These
 graphs are composed of interconnected :ref:`apply`, :ref:`variable` and
 :ref:`op` nodes. *Apply* node represents the application of an *op* to some
 *variables*. It is important to draw the difference between the
@@ -32,7 +32,7 @@ This should help you understand how these pieces fit together:
 
 .. testcode::
 
-   import theano.tensor as tt
+   import aesara.tensor as tt
 
    x = tt.dmatrix('x')
    y = tt.dmatrix('y')
@@ -79,11 +79,11 @@ The graph can be traversed starting from outputs (the result of some
 computation) down to its inputs using the owner field.
 Take for example the following code:
 
->>> import theano
->>> x = theano.tensor.dmatrix('x')
+>>> import aesara
+>>> x = aesara.tensor.dmatrix('x')
 >>> y = x * 2.
 
-If you enter ``type(y.owner)`` you get ``<class 'theano.graph.basic.Apply'>``,
+If you enter ``type(y.owner)`` you get ``<class 'aesara.graph.basic.Apply'>``,
 which is the apply node that connects the op and the inputs to get this
 output. You can now print the name of the op that is applied to get
 *y*:
@@ -106,11 +106,11 @@ because 2 was first :term:`broadcasted <broadcasting>` to a matrix of
 same shape as *x*. This is done by using the op ``DimShuffle`` :
 
 >>> type(y.owner.inputs[1])
-<class 'theano.tensor.var.TensorVariable'>
+<class 'aesara.tensor.var.TensorVariable'>
 >>> type(y.owner.inputs[1].owner)
-<class 'theano.graph.basic.Apply'>
+<class 'aesara.graph.basic.Apply'>
 >>> y.owner.inputs[1].owner.op # doctest: +SKIP
-<theano.tensor.elemwise.DimShuffle object at 0x106fcaf10>
+<aesara.tensor.elemwise.DimShuffle object at 0x106fcaf10>
 >>> y.owner.inputs[1].owner.inputs
 [TensorConstant{2.0}]
 
@@ -124,7 +124,7 @@ Graph Structures
 ================
 
 The following section outlines each type of structure that may be used
-in a Theano-built computation graph. The following structures are
+in an Aesara-built computation graph. The following structures are
 explained: :ref:`apply`, :ref:`constant`, :ref:`op`, :ref:`variable` and
 :ref:`type`.
 
@@ -139,7 +139,7 @@ Apply
 -----
 
 An *Apply node* is a type of internal node used to represent a
-:term:`computation graph <graph>` in Theano. Unlike
+:term:`computation graph <graph>` in Aesara. Unlike
 :ref:`Variable nodes <variable>`, Apply nodes are usually not
 manipulated directly by the end user. They may be accessed via
 a Variable's ``owner`` field.
@@ -153,8 +153,8 @@ inputs. Therefore, an Apply node may be obtained from an Op
 and a list of inputs by calling ``Op.make_node(*inputs)``.
 
 Comparing with the Python language, an :ref:`apply` node is
-Theano's version of a function call whereas an :ref:`op` is
-Theano's version of a function definition.
+Aesara's version of a function call whereas an :ref:`op` is
+Aesara's version of a function definition.
 
 An Apply instance has three important fields:
 
@@ -184,7 +184,7 @@ An Apply instance can be created by calling ``graph.basic.Apply(op, inputs, outp
 Op
 --
 
-An :ref:`op` in Theano defines a certain computation on some types of
+An :ref:`op` in Aesara defines a certain computation on some types of
 inputs, producing some types of outputs. It is equivalent to a
 function definition in most programming languages. From a list of
 input :ref:`Variables <variable>` and an Op, you can build an :ref:`apply`
@@ -192,7 +192,7 @@ node representing the application of the Op to the inputs.
 
 It is important to understand the distinction between an Op (the
 definition of a function) and an Apply node (the application of a
-function). If you were to interpret the Python language using Theano's
+function). If you were to interpret the Python language using Aesara's
 structures, code going like ``def f(x): ...`` would produce an Op for
 ``f`` whereas code like ``a = f(x)`` or ``g(f(4), 5)`` would produce an
 Apply node involving the ``f`` Op.
@@ -208,10 +208,10 @@ Apply node involving the ``f`` Op.
 Type
 ----
 
-A :ref:`type` in Theano represents a set of constraints on potential
-data objects. These constraints allow Theano to tailor C code to handle
+A :ref:`type` in Aesara represents a set of constraints on potential
+data objects. These constraints allow Aesara to tailor C code to handle
 them and to statically optimize the computation graph. For instance,
-the :ref:`irow <libdoc_tensor_creation>` type in the ``theano.tensor`` package
+the :ref:`irow <libdoc_tensor_creation>` type in the ``aesara.tensor`` package
 gives the following constraints on the data the Variables of type ``irow``
 may contain:
 
@@ -219,14 +219,14 @@ may contain:
 #. Must be an array of 32-bit integers: ``str(x.dtype) == 'int32'``
 #. Must have a shape of 1xN: ``len(x.shape) == 2 and x.shape[0] == 1``
 
-Knowing these restrictions, Theano may generate C code for addition, etc.
+Knowing these restrictions, Aesara may generate C code for addition, etc.
 that declares the right data types and that contains the right number
 of loops over the dimensions.
 
-Note that a Theano :ref:`type` is not equivalent to a Python type or
-class. Indeed, in Theano, :ref:`irow <libdoc_tensor_creation>` and :ref:`dmatrix
+Note that an Aesara :ref:`type` is not equivalent to a Python type or
+class. Indeed, in Aesara, :ref:`irow <libdoc_tensor_creation>` and :ref:`dmatrix
 <libdoc_tensor_creation>` both use ``numpy.ndarray`` as the underlying type
-for doing computations and storing data, yet they are different Theano
+for doing computations and storing data, yet they are different Aesara
 Types. Indeed, the constraints set by ``dmatrix`` are:
 
 #. Must be an instance of ``numpy.ndarray``: ``isinstance(x, numpy.ndarray)``
@@ -239,7 +239,7 @@ There are cases in which a Type can fully correspond to a Python type,
 such as the ``double`` Type we will define here, which corresponds to
 Python's ``float``. But, it's good to know that this is not necessarily
 the case. Unless specified otherwise, when we say "Type" we mean a
-Theano Type.
+Aesara Type.
 
 
 .. index::
@@ -254,16 +254,16 @@ Variable
 --------
 
 A :ref:`variable` is the main data structure you work with when using
-Theano. The symbolic inputs that you operate on are Variables and what
+Aesara. The symbolic inputs that you operate on are Variables and what
 you get from applying various Ops to these inputs are also
 Variables. For example, when I type
 
->>> import theano
->>> x = theano.tensor.ivector()
+>>> import aesara
+>>> x = aesara.tensor.ivector()
 >>> y = -x
 
 ``x`` and ``y`` are both Variables, i.e. instances of the :class:`Variable` class. The :ref:`type` of both ``x`` and
-``y`` is ``theano.tensor.ivector``.
+``y`` is ``aesara.tensor.ivector``.
 
 Unlike ``x``, ``y`` is a Variable produced by a computation (in this
 case, it is the negation of ``x``). ``y`` is the Variable corresponding to
@@ -272,7 +272,7 @@ corresponding to its input. The computation itself is represented by
 another type of node, an :ref:`apply` node, and may be accessed
 through ``y.owner``.
 
-More specifically, a Variable is a basic structure in Theano that
+More specifically, a Variable is a basic structure in Aesara that
 represents a datum at a certain point in computation. It is typically
 an instance of the class :class:`Variable` or
 one of its subclasses.
@@ -324,11 +324,9 @@ of inputs.  In fact, doing so will raise an exception.
 Graph Structures Extension
 ==========================
 
-When we start the compilation of a Theano function, we compute some
+When we start the compilation of an Aesara function, we compute some
 extra information. This section describes a portion of the information
-that is made available. Not everything is described, so email
-theano-dev if you need something that is missing.
-
+that is made available.
 
 The graph gets cloned at the start of compilation, so modifications done
 during compilation won't affect the user graph.
@@ -348,10 +346,10 @@ such that: ``fgraph.clients[var][*][0].inputs[index]`` or
 ``fgraph.outputs[index]`` is that variable.
 
 
->>> import theano
->>> v = theano.tensor.vector()
->>> f = theano.function([v], (v+1).sum())
->>> theano.printing.debugprint(f)
+>>> import aesara
+>>> v = aesara.tensor.vector()
+>>> f = aesara.function([v], (v+1).sum())
+>>> aesara.printing.debugprint(f)
 Sum{acc_dtype=float64} [id A] ''   1
  |Elemwise{add,no_inplace} [id B] ''   0
    |TensorConstant{(1,) of 1.0} [id C]
@@ -370,7 +368,7 @@ Sum{acc_dtype=float64} [id A] ''   1
 >>> client
 (Sum{acc_dtype=float64}(Elemwise{add,no_inplace}.0), 0)
 >>> type(client[0])
-<class 'theano.graph.basic.Apply'>
+<class 'aesara.graph.basic.Apply'>
 >>> assert client[0].inputs[client[1]] is var
 
 >>> # An output of the graph
@@ -385,7 +383,7 @@ Automatic Differentiation
 =========================
 
 Having the graph structure, computing automatic differentiation is
-simple. The only thing :func:`theano.grad` has to do is to traverse the
+simple. The only thing :func:`aesara.grad` has to do is to traverse the
 graph from the outputs back towards the inputs through all *apply*
 nodes (*apply* nodes are those that define which computations the
 graph does). For each such *apply* node, its *op* defines
@@ -404,19 +402,19 @@ in greater detail.
 Optimizations
 =============
 
-When compiling a Theano function, what you give to the
-:func:`theano.function <function.function>` is actually a graph
+When compiling an Aesara function, what you give to the
+:func:`aesara.function <function.function>` is actually a graph
 (starting from the output variables you can traverse the graph up to
 the input variables). While this graph structure shows how to compute
 the output from the input, it also offers the possibility to improve the
 way this computation is carried out. The way optimizations work in
-Theano is by identifying and replacing certain patterns in the graph
+Aesara is by identifying and replacing certain patterns in the graph
 with other specialized patterns that produce the same results but are either
 faster or more stable. Optimizations can also detect
 identical subgraphs and ensure that the same values are not computed
 twice or reformulate parts of the graph to a GPU specific version.
 
-For example, one (simple) optimization that Theano uses is to replace
+For example, one (simple) optimization that Aesara uses is to replace
 the pattern :math:`\frac{xy}{y}` by *x.*
 
 Further information regarding the optimization
@@ -429,18 +427,18 @@ is respectively available in the library and on the entrance page of the documen
 Symbolic programming involves a change of paradigm: it will become clearer
 as we apply it. Consider the following example of optimization:
 
->>> import theano
->>> a = theano.tensor.vector("a")      # declare symbolic variable
+>>> import aesara
+>>> a = aesara.tensor.vector("a")      # declare symbolic variable
 >>> b = a + a ** 10                    # build symbolic expression
->>> f = theano.function([a], b)        # compile function
+>>> f = aesara.function([a], b)        # compile function
 >>> print(f([0, 1, 2]))                # prints `array([0,2,1026])`
 [    0.     2.  1026.]
->>> theano.printing.pydotprint(b, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True)  # doctest: +SKIP
+>>> aesara.printing.pydotprint(b, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at ./pics/symbolic_graph_unopt.png
->>> theano.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True)  # doctest: +SKIP
+>>> aesara.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at ./pics/symbolic_graph_opt.png
 
-We used :func:`theano.printing.pydotprint` to visualize the optimized graph
+We used :func:`aesara.printing.pydotprint` to visualize the optimized graph
 (right), which is much more compact than the unoptimized graph (left).
 
 .. |g1| image:: ./pics/symbolic_graph_unopt.png
diff --git a/doc/extending/index.txt b/doc/extending/index.txt
index a0649d328c..cef5ea9f4d 100644
--- a/doc/extending/index.txt
+++ b/doc/extending/index.txt
@@ -2,41 +2,41 @@
 .. _extending:
 
 ================
-Extending Theano
+Extending Aesara
 ================
 
-This advanced tutorial is for users who want to extend Theano with new Types, 
-new Operations (Ops), and new graph optimizations. This first page of the 
-tutorial mainly focuses on the Python implementation of an Op and then 
-proposes an overview of the most important methods that define an op. 
-The second page of the tutorial (:ref:`extending_theano_c`) provides then 
-information on the C implementation of an Op. The rest of the tutorial 
-goes more in depth on advanced topics related to Ops, such as how to write 
-efficient code for an Op and how to write an optimization to speed up the 
+This advanced tutorial is for users who want to extend Aesara with new Types,
+new Operations (Ops), and new graph optimizations. This first page of the
+tutorial mainly focuses on the Python implementation of an Op and then
+proposes an overview of the most important methods that define an op.
+The second page of the tutorial (:ref:`extending_aesara_c`) provides then
+information on the C implementation of an Op. The rest of the tutorial
+goes more in depth on advanced topics related to Ops, such as how to write
+efficient code for an Op and how to write an optimization to speed up the
 execution of an Op.
 
-Along the way, this tutorial also introduces many aspects of how Theano works, 
-so it is also good for you if you are interested in getting more under the hood 
-with Theano itself.
+Along the way, this tutorial also introduces many aspects of how Aesara works,
+so it is also good for you if you are interested in getting more under the hood
+with Aesara itself.
 
 .. note::
 
-    Before tackling this more advanced presentation, it is highly recommended 
-    to read the introductory :ref:`Tutorial<tutorial>`, especially the sections 
-    that introduce the Theano Graphs, as providing a novel Theano op requires a 
-    basic understanting of the Theano Graphs.
+    Before tackling this more advanced presentation, it is highly recommended
+    to read the introductory :ref:`Tutorial<tutorial>`, especially the sections
+    that introduce the Aesara Graphs, as providing a novel Aesara op requires a
+    basic understanting of the Aesara Graphs.
 
-    See also the :ref:`dev_start_guide` for information regarding the 
-    versioning framework, namely about *git* and *GitHub*, regarding the 
+    See also the :ref:`dev_start_guide` for information regarding the
+    versioning framework, namely about *git* and *GitHub*, regarding the
     development workflow and how to make a quality contribution.
 
 .. toctree::
 
-    extending_theano
-    extending_theano_c
+    extending_aesara
+    extending_aesara_c
     fibby
     pipeline
-    theano_vs_c
+    aesara_vs_c
     graphstructures
     type
     op
@@ -45,7 +45,7 @@ with Theano itself.
     ctype
     cop
     using_params
-    extending_theano_gpu
+    extending_aesara_gpu
     optimization
     tips
     unittest
diff --git a/doc/extending/inplace.txt b/doc/extending/inplace.txt
index a157299a44..a9174eaa8d 100644
--- a/doc/extending/inplace.txt
+++ b/doc/extending/inplace.txt
@@ -5,14 +5,14 @@
 Views and inplace operations
 ============================
 
-Theano allows the definition of Ops which return a :term:`view` on one
+Aesara allows the definition of Ops which return a :term:`view` on one
 of their inputs or operate :term:`inplace` on one or several
 inputs. This allows more efficient operations on numpy's ``ndarray``
 data type than would be possible otherwise.
 However, in order to work correctly, these Ops need to
 implement an additional interface.
 
-Theano recognizes views and inplace operations specially. It ensures
+Aesara recognizes views and inplace operations specially. It ensures
 that they are used in a consistent manner and it ensures that
 operations will be carried in a compatible order.
 
@@ -51,13 +51,13 @@ range ``0xDEADBEFF - 0xDEADBFDF`` and z the range ``0xCAFEBABE -
 considered to be a view of ``x`` and vice versa.
 
 Suppose you had an Op which took ``x`` as input and returned
-``y``. You would need to tell Theano that ``y`` is a view of ``x``. For this
+``y``. You would need to tell Aesara that ``y`` is a view of ``x``. For this
 purpose, you would set the ``view_map`` field as follows:
 
 
 .. testsetup::
 
-   from theano.graph.op import Op
+   from aesara.graph.op import Op
    myop = Op()
 
 .. testcode::
@@ -99,8 +99,8 @@ operation on ``x``.
 
 .. note::
 
-   Inplace operations in Theano still work in a functional setting:
-   they need to return the modified input. Symbolically, Theano
+   Inplace operations in Aesara still work in a functional setting:
+   they need to return the modified input. Symbolically, Aesara
    requires one Variable standing for the input *before* being modified
    and *another* Variable representing the input *after* being
    modified. Therefore, code using inplace operations would look like
@@ -108,8 +108,8 @@ operation on ``x``.
 
    .. testcode::
 
-      from theano.tensor import dscalars, log
-      from theano.tensor.inplace import add_inplace
+      from aesara.tensor import dscalars, log
+      from aesara.tensor.inplace import add_inplace
 
       x, y = dscalars('x', 'y')
       r1 = log(x)
@@ -119,7 +119,7 @@ operation on ``x``.
 
       # r3 is log(x) using the x from BEFORE the add_inplace
       # r3 is the SAME as r1, even if we wrote this line after the add_inplace line
-      # Theano is actually going to compute r3 BEFORE r2
+      # Aesara is actually going to compute r3 BEFORE r2
       r3 = log(x)
 
       # this is log(x) using the x from AFTER the add_inplace (so it's like log(x + y))
@@ -130,12 +130,12 @@ operation on ``x``.
    give to ``Apply`` in the definition of ``make_node``.
 
    Also, for technical reasons but also because they are slightly
-   confusing to use as evidenced by the previous code, Theano does not
+   confusing to use as evidenced by the previous code, Aesara does not
    allow the end user to use inplace operations by default. However,
    it does allow *optimizations* to substitute them in in a later
    phase. Therefore, typically, if you define an inplace operation,
    you will define a pure equivalent and an optimization which
-   subsitutes one for the other. Theano will automatically verify if
+   subsitutes one for the other. Aesara will automatically verify if
    it is possible to do so and will refuse the substitution if it
    introduces inconsistencies.
 
@@ -148,7 +148,7 @@ the addition. That would be a normal, :term:`pure` Op. Alternatively,
 it could add one to each byte *in* the buffer ``x``, therefore
 changing it. That would be an inplace Op.
 
-Theano needs to be notified of this fact. The syntax is similar to
+Aesara needs to be notified of this fact. The syntax is similar to
 that of ``view_map``:
 
 
@@ -182,8 +182,8 @@ Destructive Operations
 
 While some operations will operate inplace on their inputs, some might
 simply destroy or corrupt them. For example, an Op could do temporary
-calculations right in its inputs. If that is the case, Theano also
-needs to be notified. The way to notify Theano is to assume that some
+calculations right in its inputs. If that is the case, Aesara also
+needs to be notified. The way to notify Aesara is to assume that some
 output operated inplace on whatever inputs are changed or corrupted by
 the Op (even if the output does not technically reuse any of the
 input(s)'s memory). From there, go to the previous section.
@@ -192,13 +192,13 @@ input(s)'s memory). From there, go to the previous section.
 .. warning::
    Failure to correctly mark down views and inplace operations using
    ``view_map`` and ``destroy_map`` can lead to nasty bugs. In the
-   absence of this information, Theano might assume that it is safe to
+   absence of this information, Aesara might assume that it is safe to
    execute an inplace operation on some inputs *before* doing other
    calculations on the *previous* values of the inputs. For example,
    in the code: ``y = log(x); x2 = add_inplace(x, z)`` it is
    imperative to do the logarithm before the addition (because after
    the addition, the original x that we wanted to take the logarithm
-   of is gone). If Theano does not know that ``add_inplace`` changes
+   of is gone). If Aesara does not know that ``add_inplace`` changes
    the value of ``x`` it might invert the order and that will
    certainly lead to erroneous computations.
 
@@ -220,7 +220,7 @@ The problem with DebugMode is that it will trigger a useless error when
 checking a rejected inplace optimization, since it will lead to wrong results.
 In order to be able to use DebugMode in more situations, your inplace
 optimization can pre-check whether it will get rejected by using the
-``theano.graph.destroyhandler.fast_inplace_check()`` function, that will tell
+``aesara.graph.destroyhandler.fast_inplace_check()`` function, that will tell
 which Ops can be performed inplace. You may then skip the optimization if it is
 incompatible with this check. Note however that this check does not cover all
 cases where an optimization may be rejected (it will not detect cycles).
diff --git a/doc/extending/op.txt b/doc/extending/op.txt
index 1566fa7da1..9d931280f6 100644
--- a/doc/extending/op.txt
+++ b/doc/extending/op.txt
@@ -5,7 +5,7 @@ Making arithmetic Ops on double
 
 .. testsetup:: *
 
-   from theano.graph.type import Type
+   from aesara.graph.type import Type
 
    class Double(Type):
 
@@ -52,7 +52,7 @@ define the following methods.
   This method is responsible for creating output Variables of a
   suitable symbolic `Type` to serve as the outputs of this Op's
   application.  The Variables found in ``*inputs`` must be operated on
-  using Theano's symbolic language to compute the symbolic output
+  using Aesara's symbolic language to compute the symbolic output
   Variables. This method should put these outputs into an Apply
   instance, and return the Apply instance.
 
@@ -207,7 +207,7 @@ Optional methods or attributes
    By default this is a convenience function which calls
    :meth:`make_node` with the supplied arguments and returns the
    result indexed by `default_output`.  This can be overridden by
-   subclasses to do anything else, but must return either a theano
+   subclasses to do anything else, but must return either an Aesara
    Variable or a list of Variables.
 
    If you feel the need to override `__call__` to change the graph
@@ -230,7 +230,7 @@ Optional methods or attributes
    The function should return a list with one tuple for each output.
    Each tuple should contain the corresponding output's computed shape.
 
-   Implementing this method will allow Theano to compute the output's
+   Implementing this method will allow Aesara to compute the output's
    shape without computing the output itself, potentially sparing you
    a costly recomputation.
 
@@ -260,7 +260,7 @@ Optional methods or attributes
 
    By default when optimizations are enabled, we remove during
    function compilation Apply nodes whose inputs are all constants.
-   We replace the Apply node with a Theano constant variable.
+   We replace the Apply node with an Aesara constant variable.
    This way, the Apply node is not executed at each function
    call. If you want to force the execution of an op during the
    function call, make do_constant_folding return False.
@@ -293,8 +293,8 @@ These are the function required to work with gradient.grad().
 
   If the `Op` being defined is differentiable, its gradient may be
   specified symbolically in this method. Both ``inputs`` and
-  ``output_gradients`` are lists of symbolic Theano Variables and
-  those must be operated on using Theano's symbolic language. The grad
+  ``output_gradients`` are lists of symbolic Aesara Variables and
+  those must be operated on using Aesara's symbolic language. The grad
   method must return a list containing one Variable for each
   input. Each returned Variable represents the gradient with respect
   to that input computed based on the symbolic gradients with respect
@@ -304,23 +304,23 @@ These are the function required to work with gradient.grad().
   this method should be defined to return a variable of type NullType
   for that input. Likewise, if you have not implemented the grad
   computation for some input, you may return a variable of type
-  NullType for that input. theano.gradient contains convenience
+  NullType for that input. aesara.gradient contains convenience
   methods that can construct the variable for you:
-  :func:`theano.gradient.grad_undefined` and
-  :func:`theano.gradient.grad_not_implemented`, respectively.
+  :func:`aesara.gradient.grad_undefined` and
+  :func:`aesara.gradient.grad_not_implemented`, respectively.
 
   If an element of output_gradient is of type
-  `theano.gradient.DisconnectedType`, it means that the cost is not a
+  `aesara.gradient.DisconnectedType`, it means that the cost is not a
   function of this output. If any of the `Op`'s inputs participate in
   the computation of only disconnected outputs, then `Op.grad` should
   return `DisconnectedType` variables for those inputs.
 
-  If the `Op.grad` method is not defined, then Theano assumes it has been
+  If the `Op.grad` method is not defined, then Aesara assumes it has been
   forgotten.  Symbolic differentiation will fail on a graph that
   includes this `Op`.
 
   It must be understood that the `Op`'s `grad` method is not meant to
-  return the gradient of the `Op`'s output. `theano.grad` computes
+  return the gradient of the `Op`'s output. `aesara.grad` computes
   gradients; `Op.grad` is a helper function that computes terms that
   appear in gradients.
 
@@ -328,10 +328,10 @@ These are the function required to work with gradient.grad().
   vector-valued input ``x``, then the grad method will be passed ``x`` and a
   second vector ``z``. Define ``J`` to be the Jacobian of ``y`` with respect to
   ``x``. The `Op`'s `grad` method should return ``dot(J.T,z)``. When
-  `theano.grad` calls the grad method, it will set ``z`` to be the
+  `aesara.grad` calls the grad method, it will set ``z`` to be the
   gradient of the cost ``C`` with respect to ``y``. If this `Op` is the only `Op`
   that acts on ``x``, then ``dot(J.T,z)`` is the gradient of C with respect to
-  ``x``.  If there are other `Op`s that act on ``x``, `theano.grad` will
+  ``x``.  If there are other `Op`s that act on ``x``, `aesara.grad` will
   have to add up the terms of ``x``'s gradient contributed by the other
   `Op`'s grad method.
 
@@ -347,7 +347,7 @@ These are the function required to work with gradient.grad().
   the returned value should be equal to the Jacobian-vector product.
 
   So long as you implement this product correctly, you need not
-  understand what `theano.gradient.grad` is doing, but for the curious the
+  understand what `aesara.gradient.grad` is doing, but for the curious the
   mathematical justification is as follows:
 
   In essence, the grad method must simply implement through symbolic
@@ -376,7 +376,7 @@ These are the function required to work with gradient.grad().
   C}{d f} * \frac{d f}{d x}`.
 
   Here, the chain rule must be implemented in a similar but slightly
-  more complex setting: Theano provides in the list
+  more complex setting: Aesara provides in the list
   ``output_gradients`` one gradient for each of the Variables returned
   by the `Op`. Where f is one such particular Variable, the
   corresponding gradient found in ``output_gradients`` and
@@ -400,7 +400,7 @@ These are the function required to work with gradient.grad().
   \frac{d f_i}{d x_j}`.  Both the partial differentiation and the
   multiplication have to be performed by :func:`grad`.
 
-  Theano currently imposes the following constraints on the values
+  Aesara currently imposes the following constraints on the values
   returned by the grad method:
 
   1) They must be Variable instances.
@@ -416,7 +416,7 @@ These are the function required to work with gradient.grad().
   :math:`\frac{d f}{d x} = \lim_{\epsilon \rightarrow 0} (f(x+\epsilon)-f(x))/\epsilon`.
 
   Suppose your function f has an integer-valued output. For most
-  functions you're likely to implement in theano, this means your
+  functions you're likely to implement in aesara, this means your
   gradient should be zero, because f(x+epsilon) = f(x) for almost all
   x. (The only other option is that the gradient could be undefined,
   if your function is discontinuous everywhere, like the rational
@@ -425,14 +425,14 @@ These are the function required to work with gradient.grad().
   Suppose your function f has an integer-valued input. This is a
   little trickier, because you need to think about what you mean
   mathematically when you make a variable integer-valued in
-  theano. Most of the time in machine learning we mean "f is a
+  aesara. Most of the time in machine learning we mean "f is a
   function of a real-valued x, but we are only going to pass in
   integer-values of x". In this case, f(x+epsilon) exists, so the
   gradient through f should be the same whether x is an integer or a
   floating point variable. Sometimes what we mean is "f is a function
   of an integer-valued x, and f is only defined where x is an
   integer." Since f(x+epsilon) doesn't exist, the gradient is
-  undefined.  Finally, many times in theano, integer valued inputs
+  undefined.  Finally, many times in aesara, integer valued inputs
   don't actually affect the elements of the output, only its shape.
 
   If your function f has both an integer-valued input and an
@@ -492,12 +492,12 @@ These are the function required to work with gradient.grad().
   elements of all outputs.
 
   This method conveys two pieces of information that are otherwise
-  not part of the theano graph:
+  not part of the aesara graph:
 
   1) Which of the op's inputs are truly ancestors of each of the
      op's outputs. Suppose an op has two inputs, x and y, and
      outputs f(x) and g(y). y is not really an ancestor of f, but
-     it appears to be so in the theano graph.
+     it appears to be so in the aesara graph.
   2) Whether the actual elements of each input/output are relevant
      to a computation.
      For example, the shape op does not read its input's elements,
@@ -560,7 +560,7 @@ First, we'll instantiate a ``mul`` Op:
 
 .. testcode:: mul
 
-   from theano.graph.op import Op
+   from aesara.graph.op import Op
 
 
    mul = Op()
@@ -594,7 +594,7 @@ node representing the application of `Op` ``mul`` to inputs ``x`` and
 
 .. note::
 
-   Theano relies on the fact that if you call the ``make_node`` method
+   Aesara relies on the fact that if you call the ``make_node`` method
    of Apply's first argument on the inputs passed as the Apply's
    second argument, the call will not fail and the returned Apply
    instance will be equivalent.  This is how graphs are copied.
@@ -620,18 +620,18 @@ Here, ``z`` is a list of one element. By default, ``z == [None]``.
 .. note::
 
    It is possible that ``z`` does not contain ``None``. If it contains
-   anything else, Theano guarantees that whatever it contains is what
+   anything else, Aesara guarantees that whatever it contains is what
    ``perform`` put there the last time it was called with this
-   particular storage. Furthermore, Theano gives you permission to do
+   particular storage. Furthermore, Aesara gives you permission to do
    whatever you want with ``z``'s contents, chiefly reusing it or the
    memory allocated for it. More information can be found in the
    :ref:`op` documentation.
 
 .. warning::
 
-   We gave ``z`` the Theano type ``double`` in ``make_node``, which means
+   We gave ``z`` the Aesara type ``double`` in ``make_node``, which means
    that a Python ``float`` must be put there. You should not put, say, an
-   ``int`` in ``z[0]`` because Theano assumes Ops handle typing properly.
+   ``int`` in ``z[0]`` because Aesara assumes Ops handle typing properly.
 
 
 Trying out our new Op
@@ -641,10 +641,10 @@ In the following code, we use our new `Op`:
 
 .. doctest:: mul
 
-   >>> import theano
+   >>> import aesara
    >>> x, y = double('x'), double('y')
    >>> z = mul(x, y)
-   >>> f = theano.function([x, y], z)
+   >>> f = aesara.function([x, y], z)
    >>> f(5, 6)
    30.0
    >>> f(5.6, 6.7)
@@ -660,7 +660,7 @@ they are magically cast to the right type. Now, what if we try this?
    >>> z = mul(x, 2)
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
-     File "/u/breuleuo/hg/theano/theano/graph/op.py", line 207, in __call__
+     File "/u/breuleuo/hg/aesara/aesara/graph/op.py", line 207, in __call__
      File "<stdin>", line 2, in make_node
    AttributeError: 'int' object has no attribute 'type'
 
@@ -692,7 +692,7 @@ is a :ref:`variable` we statically know the value of.
    >>> import numpy
    >>> x = double('x')
    >>> z = mul(x, 2)
-   >>> f = theano.function([x], z)
+   >>> f = aesara.function([x], z)
    >>> f(10)
    20.0
    >>> numpy.allclose(f(3.4), 6.8)
@@ -701,7 +701,7 @@ is a :ref:`variable` we statically know the value of.
 Now the code works the way we want it to.
 
 .. note::
-   Most Theano Ops follow this convention of up-casting literal
+   Most Aesara Ops follow this convention of up-casting literal
    make_node arguments to Constants.
    This makes typing expressions more natural.  If you do
    not want a constant somewhere in your graph, you have to pass a Variable
@@ -719,8 +719,8 @@ arithmetic operators:
 
 .. testcode::
 
-   from theano.graph.basic import Apply, Constant
-   from theano.graph.op import Op
+   from aesara.graph.basic import Apply, Constant
+   from aesara.graph.op import Op
 
 
    class BinaryDoubleOp(Op):
diff --git a/doc/extending/optimization.txt b/doc/extending/optimization.txt
index 57c43b638b..3f9f0a17da 100644
--- a/doc/extending/optimization.txt
+++ b/doc/extending/optimization.txt
@@ -29,7 +29,7 @@ In this section we will define a couple optimizations on doubles.
 Global and local optimizations
 ==============================
 
-First, let's lay out the way optimizations work in Theano. There are
+First, let's lay out the way optimizations work in Aesara. There are
 two types of optimizations: *global* optimizations and *local*
 optimizations. A global optimization takes a ``FunctionGraph`` object (a
 FunctionGraph is a wrapper around a whole computation graph, you can see its
@@ -73,7 +73,7 @@ methods:
 
     .. method:: optimize(fgraph)
 
-      This is the interface function called by Theano.
+      This is the interface function called by Aesara.
 
       *Default:* this is defined by GlobalOptimizer as ``add_requirement(fgraph);
       apply(fgraph)``.
@@ -121,9 +121,9 @@ simplification described above:
 
 .. testcode::
 
-   import theano
-   from theano.graph.opt import GlobalOptimizer
-   from theano.graph.toolbox import ReplaceValidate
+   import aesara
+   from aesara.graph.opt import GlobalOptimizer
+   from aesara.graph.toolbox import ReplaceValidate
 
    class Simplify(GlobalOptimizer):
        def add_requirements(self, fgraph):
@@ -181,7 +181,7 @@ for the simplification (``x``, ``y``, ``z``, ``a``, ``b``, etc.).
 
 Test time:
 
->>> from theano.scalar import float64, add, mul, true_div
+>>> from aesara.scalar import float64, add, mul, true_div
 >>> x = float64('x')
 >>> y = float64('y')
 >>> z = float64('z')
@@ -213,9 +213,9 @@ Nothing happened here. The reason is: ``add(y, z) != add(y,
 z)``. That is the case for efficiency reasons. To fix this problem we
 first need to merge the parts of the graph that represent the same
 computation, using the ``MergeOptimizer`` defined in
-``theano.graph.opt``.
+``aesara.graph.opt``.
 
->>> from theano.graph.opt import MergeOptimizer
+>>> from aesara.graph.opt import MergeOptimizer
 >>> MergeOptimizer().optimize(e)  # doctest: +ELLIPSIS
 (0, ..., None, None, {}, 1, 0)
 >>> e
@@ -227,15 +227,15 @@ computation, using the ``MergeOptimizer`` defined in
 Once the merge is done, both occurrences of ``add(y, z)`` are
 collapsed into a single one and is used as an input in two
 places. Note that ``add(x, y)`` and ``add(y, x)`` are still considered
-to be different because Theano has no clue that ``add`` is
+to be different because Aesara has no clue that ``add`` is
 commutative. You may write your own global optimizer to identify
 computations that are identical with full knowledge of the rules of
-arithmetics that your Ops implement. Theano might provide facilities
+arithmetics that your Ops implement. Aesara might provide facilities
 for this somewhere in the future.
 
 .. note::
 
-   :class:`FunctionGraph` is a Theano structure intended for the optimization
+   :class:`FunctionGraph` is an Aesara structure intended for the optimization
    phase. It is used internally by function and is rarely
    exposed to the end user. You can use it to test out optimizations,
    etc. if you are comfortable with it, but it is recommended to use
@@ -295,14 +295,14 @@ subset of them) and applies one or several local optimizers on them.
 [add(z, mul(true_div(mul(y, x), y), true_div(z, x)))]
 >>> simplify = graph.opt.TopoOptimizer(local_simplify)
 >>> simplify.optimize(e)
-(<theano.graph.opt.TopoOptimizer object at 0x...>, 1, 5, 3, ..., ..., ...)
+(<aesara.graph.opt.TopoOptimizer object at 0x...>, 1, 5, 3, ..., ..., ...)
 >>> e
 [add(z, mul(x, true_div(z, x)))]
 
 OpSub, OpRemove, PatternSub
 +++++++++++++++++++++++++++
 
-Theano defines some shortcuts to make LocalOptimizers:
+Aesara defines some shortcuts to make LocalOptimizers:
 
 .. function:: OpSub(op1, op2)
 
@@ -324,11 +324,11 @@ Theano defines some shortcuts to make LocalOptimizers:
 
 .. testsetup::
 
-   from theano.scalar import identity
+   from aesara.scalar import identity
 
 .. testcode::
 
-   from theano.graph.opt import OpSub, OpRemove, PatternSub
+   from aesara.graph.opt import OpSub, OpRemove, PatternSub
 
    # Replacing add by mul (this is not recommended for primarily
    # mathematical reasons):
@@ -369,7 +369,7 @@ use constraints, etc. - there's some decent doc at
 The optimization database (optdb)
 =================================
 
-Theano exports a symbol called ``optdb`` which acts as a sort of
+Aesara exports a symbol called ``optdb`` which acts as a sort of
 ordered database of optimizations. When you make a new optimization,
 you must insert it at the proper place in the database. Furthermore,
 you can give each optimization in the database a set of tags that can
@@ -417,10 +417,10 @@ well and the LocalOptimizers they return will be put in their places
 (note that as of yet no DB can produce LocalOptimizer objects, so this
 is a moot point).
 
-Theano contains one principal DB object, :class:`optdb`, which
-contains all of Theano's optimizers with proper tags. It is
+Aesara contains one principal DB object, :class:`optdb`, which
+contains all of Aesara's optimizers with proper tags. It is
 recommended to insert new Optimizers in it. As mentioned previously,
-optdb is a SequenceDB, so, at the top level, Theano applies a sequence
+optdb is a SequenceDB, so, at the top level, Aesara applies a sequence
 of global optimizations to the computation graphs.
 
 
@@ -431,7 +431,7 @@ A Query is built by the following call:
 
 .. code-block:: python
 
-   theano.graph.optdb.Query(include, require=None, exclude=None, subquery=None)
+   aesara.graph.optdb.Query(include, require=None, exclude=None, subquery=None)
 
 .. class:: Query
 
@@ -472,8 +472,8 @@ Optimizer:
 
 .. testcode::
 
-   from theano.graph.optdb import Query
-   from theano.compile import optdb
+   from aesara.graph.optdb import Query
+   from aesara.compile import optdb
 
    # This is how the optimizer for the fast_run mode is defined
    fast_run = optdb.query(Query(include=['fast_run']))
@@ -517,7 +517,7 @@ LocalOptimizers may be registered in two ways:
   (see previous section).
 * Put them in an EquilibriumDB.
 
-Theano defines two EquilibriumDBs where you can put local
+Aesara defines two EquilibriumDBs where you can put local
 optimizations:
 
 
@@ -613,12 +613,12 @@ WRITEME
 
 .. _profiling_opt:
 
-Profiling Theano function compilation
+Profiling Aesara function compilation
 =====================================
 
-You find that compiling a Theano function is taking too much time? You
-can get profiling information about Theano optimization. The normal
-:ref:`Theano profiler <tut_profiling>` will provide you with very
+You find that compiling an Aesara function is taking too much time? You
+can get profiling information about Aesara optimization. The normal
+:ref:`Aesara profiler <tut_profiling>` will provide you with very
 high-level information. The indentation shows the included in/subset
 relationship between sections. The top of its output look like this:
 
@@ -630,33 +630,33 @@ relationship between sections. The top of its output look like this:
       Time in 0 calls to Function.__call__: 0.000000e+00s
       Total compile time: 1.131874e+01s
         Number of Apply nodes: 50
-        Theano Optimizer time: 1.152431e+00s
-           Theano validate time: 2.790451e-02s
-        Theano Linker time (includes C, CUDA code generation/compiling): 7.893991e-02s
+        Aesara Optimizer time: 1.152431e+00s
+           Aesara validate time: 2.790451e-02s
+        Aesara Linker time (includes C, CUDA code generation/compiling): 7.893991e-02s
            Import time 1.153541e-02s
-      Time in all call to theano.grad() 4.732513e-02s
+      Time in all call to aesara.grad() 4.732513e-02s
 
 Explanations:
 
-* ``Total compile time: 1.131874e+01s`` gives the total time spent inside `theano.function`.
+* ``Total compile time: 1.131874e+01s`` gives the total time spent inside `aesara.function`.
 * ``Number of Apply nodes: 50`` means that after optimization, there are 50 apply node in the graph.
-* ``Theano Optimizer time: 1.152431e+00s`` means that we spend 1.15s in the ``theano.function`` phase where we optimize (modify) the graph to make it faster / more stable numerically / work on GPU /...
-* ``Theano validate time: 2.790451e-02s`` means that we spent 2.8e-2s in the *validate* subset of the optimization phase.
-* ``Theano Linker time (includes C, CUDA code generation/compiling): 7.893991e-02s`` means that we spent 7.9e-2s in *linker* phase of ``theano.function``.
+* ``Aesara Optimizer time: 1.152431e+00s`` means that we spend 1.15s in the ``aesara.function`` phase where we optimize (modify) the graph to make it faster / more stable numerically / work on GPU /...
+* ``Aesara validate time: 2.790451e-02s`` means that we spent 2.8e-2s in the *validate* subset of the optimization phase.
+* ``Aesara Linker time (includes C, CUDA code generation/compiling): 7.893991e-02s`` means that we spent 7.9e-2s in *linker* phase of ``aesara.function``.
 * ``Import time 1.153541e-02s`` is a subset of the linker time where we import the compiled module.
-* ``Time in all call to theano.grad() 4.732513e-02s`` tells that we spent a total of 4.7e-2s in all calls to ``theano.grad``. This is outside of the calls to ``theano.function``.
+* ``Time in all call to aesara.grad() 4.732513e-02s`` tells that we spent a total of 4.7e-2s in all calls to ``aesara.grad``. This is outside of the calls to ``aesara.function``.
 
 The *linker* phase includes the generation of the C code, the time spent
-by g++ to compile and the time needed by Theano to build the object we
+by g++ to compile and the time needed by Aesara to build the object we
 return. The C code generation and compilation is cached, so the first
 time you compile a function and the following ones could take different
 amount of execution time.
 
-Detailed profiling of Theano optimizer
+Detailed profiling of Aesara optimizer
 --------------------------------------
 
-You can get more detailed profiling information about the Theano
-optimizer phase by setting to `True` the Theano flags
+You can get more detailed profiling information about the Aesara
+optimizer phase by setting to `True` the Aesara flags
 :attr:`config.profile_optimizer` (this require `config.profile` to be `True`
 as well).
 
diff --git a/doc/extending/other_ops.txt b/doc/extending/other_ops.txt
index beab40a136..08a5573211 100644
--- a/doc/extending/other_ops.txt
+++ b/doc/extending/other_ops.txt
@@ -16,18 +16,13 @@ implementation strategies.
 Scalar/Elemwise/Reduction Ops
 =============================
 
-Implementing a Theano scalar Op allows that scalar operation to be reused
+Implementing an Aesara scalar Op allows that scalar operation to be reused
 by our elemwise operations on tensors. If the scalar operation has C code, the
 elemwise implementation will automatically have C code too. This
 will enable the fusion of elemwise operations using your new scalar
 operation. It can also reuse the GPU elemwise code. It is similar for
 reduction operations.
 
-For examples of how to add new scalar operations, you can have a look at
-those 2 pull requests, that add `GammaLn and Psi
-<https://github.com/Theano/Theano/pull/686/>`_ and `Gamma
-<https://github.com/Theano/Theano/pull/826/>`_ scalar Ops.
-
 Be careful about some possible problems in the definition of the
 ``grad`` method, and about dependencies that may not be available. In
 particular, see the following fixes:
@@ -42,7 +37,7 @@ and `impl() methods related to SciPy
 SciPy Ops
 =========
 
-We can wrap SciPy functions in Theano. But SciPy is an optional dependency.
+We can wrap SciPy functions in Aesara. But SciPy is an optional dependency.
 Here is some code that allows the Op to be optional:
 
 .. code-block:: python
@@ -77,7 +72,7 @@ There are a few differences to keep in mind if you want to make an op
 that uses :ref:`sparse <tutsparse>` inputs or outputs, rather than the
 usual dense tensors. In particular, in the
 ``make_node()`` function, you have to call
-``theano.sparse.as_sparse_variable(x)`` on sparse input variables,
+``Aesara.sparse.as_sparse_variable(x)`` on sparse input variables,
 instead of ``as_tensor_variable(x)``.
 
 Another difference is that you need to use ``SparseVariable`` and
@@ -94,25 +89,24 @@ you can create output variables like this:
     out_format = inputs[0].format  # or 'csr' or 'csc' if the output format is fixed
     SparseType(dtype=inputs[0].dtype, format=out_format).make_variable()
 
-See the sparse :class:`theano.sparse.basic.Cast` op `code
-<https://github.com/Theano/Theano/blob/master/theano/sparse/basic.py#L753>`_
-for a good example of a sparse op with Python code.
+See the sparse :class:`Aesara.sparse.basic.Cast` `Op` code for a good example of
+a sparse `Op` with Python code.
 
 .. note::
 
    From the definition of CSR and CSC formats, CSR column indices are
    not necessarily sorted. Likewise for CSC row indices. Use
    :class:`EnsureSortedIndices
-   <theano.sparse.basic.EnsureSortedIndices>` if your code does not
+   <Aesara.sparse.basic.EnsureSortedIndices>` if your code does not
    support it.
 
    Also, there can be explicit zeros in your inputs. Use
-   :class:`Remove0 <theano.sparse.basic.Remove0>` or ``remove0`` to
+   :class:`Remove0 <Aesara.sparse.basic.Remove0>` or ``remove0`` to
    make sure they aren't present in your input if you don't support
    that.
 
    To remove explicit zeros and make sure indices are sorted, use
-   :func:`clean <theano.sparse.basic.clean>`.
+   :func:`clean <Aesara.sparse.basic.clean>`.
 
 Sparse Gradient
 ---------------
@@ -127,7 +121,7 @@ inputs/outputs are sparse and which ones are dense.
 Sparse C code
 -------------
 
-Theano does not have a native C code interface for sparse matrices. The
+Aesara does not have a native C code interface for sparse matrices. The
 reason is simple: we use the SciPy sparse matrix objects and they don't
 have a C object. So we use a simple trick: a sparse matrix is made of
 4 fields that are NumPy vector arrays: ``data``, ``indices``, ``indptr``
@@ -136,17 +130,15 @@ an op with C code that has sparse variables as inputs, we actually make an op
 that takes as input the needed fields of those sparse variables.
 
 You can extract the 4 fields with
-:func:`theano.sparse.basic.csm_properties`. You can use
-:func:`theano.sparse.basic.csm_data`,
-:func:`theano.sparse.basic.csm_indices`,
-:func:`theano.sparse.basic.csm_indptr` and
-:func:`theano.sparse.basic.csm_shape` to extract the individual
+:func:`Aesara.sparse.basic.csm_properties`. You can use
+:func:`Aesara.sparse.basic.csm_data`,
+:func:`Aesara.sparse.basic.csm_indices`,
+:func:`Aesara.sparse.basic.csm_indptr` and
+:func:`Aesara.sparse.basic.csm_shape` to extract the individual
 fields.
 
-You can look at the `AddSD
-<https://github.com/Theano/Theano/blob/master/theano/sparse/basic.py#L1704>`_
-sparse op for an example with C code. It implements the addition of a
-sparse matrix with a dense matrix.
+You can look at the `AddSD`_ sparse `Op` for an example with C code. It implements
+the addition of a sparse matrix with a dense matrix.
 
 Sparse Tests
 ------------
@@ -180,17 +172,17 @@ distributions here:
 1. Extend our wrapper around NumPy random functions.
    See this `PR <https://github.com/Theano/Theano/pull/1607>`_ as an example.
 
-2. Extend MRG implementation by reusing existing Theano Op. Look into
-   the ``theano/sandbox/rng_mrg.py`` file and grep for all code about
+2. Extend MRG implementation by reusing existing Aesara Op. Look into
+   the ``Aesara/sandbox/rng_mrg.py`` file and grep for all code about
    binomial(). This distribution uses the output of the uniform
    distribution and converts it to a binomial distribution with
-   existing Theano operations. The tests go in
-   ``theano/sandbox/test_rng_mrg.py``
+   existing Aesara operations. The tests go in
+   ``Aesara/sandbox/test_rng_mrg.py``
 
 3. Extend MRG implementation with a new Op that takes a uniform sample as
-   input. Look in the ``theano/sandbox/{rng_mrg,multinomial}.py`` file
-   and its test in ``theano/sandbox/test_multinomal.py``. This is
-   recommended when current Theano ops aren't well suited to modify
+   input. Look in the ``Aesara/sandbox/{rng_mrg,multinomial}.py`` file
+   and its test in ``Aesara/sandbox/test_multinomal.py``. This is
+   recommended when current Aesara ops aren't well suited to modify
    the uniform to the target distribution. This can happen in
    particular if there is a loop or complicated condition.
 
@@ -208,7 +200,7 @@ To allow consistent interface of Ops that support OpenMP, we have some
 helper code. Doing this also allows to enable/disable OpenMP globally
 or per op for fine-grained control.
 
-Your Op needs to inherit from ``theano.graph.op.OpenMPOp``. If it overrides
+Your Op needs to inherit from ``Aesara.graph.op.OpenMPOp``. If it overrides
 the ``__init__()`` method, it must have an ``openmp=None`` parameter
 and must call ``super(MyOpClass, self).__init__(openmp=openmp)``.
 
@@ -217,19 +209,19 @@ The ``OpenMPOp`` class also implements ``c_compile_args`` and
 OpenMP. It also disables OpenMP and prints a warning if the version of
 g++ does not support it.
 
-The Theano flag ``openmp`` is currently False by default as we do not
+The Aesara flag ``openmp`` is currently False by default as we do not
 have code that gets sped up with it. The only current implementation
 is ConvOp. It speeds up some cases, but slows down others. That is why
 we disable it by default. But we have all the code to have it enabled
 by default if there is more than 1 core and the environment
-variable OMP_NUM_THREADS is not 1. This allows Theano to respect the
+variable OMP_NUM_THREADS is not 1. This allows Aesara to respect the
 current convention.
 
 .. note:
 
    The OpenMP parameter of an Op should not be used in its __eq__ and
    __hash__ methods. Those methods are used to merge equivalent
-   computation in a Theano graph. If we have 2 Apply nodes with the
+   computation in an Aesara graph. If we have 2 Apply nodes with the
    same inputs and they execute 2 ConvOp that only differ on the
    OpenMP parameter, we want them to be merged.
 
@@ -242,40 +234,40 @@ Want C speed without writing C code for your new Op? You can use Numba
 to generate the C code for you! Here is an `example
 Op <https://gist.github.com/nouiz/5492778#file-theano_op-py>`_ doing that.
 
-.. _alternate_theano_types:
+.. _alternate_Aesara_types:
 
-Alternate Theano Types
+Alternate Aesara Types
 ======================
 
-Most ops in Theano are used to manipulate tensors. However, Theano also
+Most ops in Aesara are used to manipulate tensors. However, Aesara also
 supports many other variable types. The supported types are listed below,
 along with pointers to the relevant documentation.
 
-*       :class:`TensorType <tensor.type.TensorType>` : Theano type that represents
+*       :class:`TensorType <tensor.type.TensorType>` : Aesara type that represents
         a multidimensional array containing elements that all have the same
-        type. Variables of this Theano type are represented in C as objects of
+        type. Variables of this Aesara type are represented in C as objects of
         class
         `PyArrayObject <http://docs.scipy.org/doc/numpy/reference/c-api.types-and-structures.html#PyArrayObject>`_.
 
-*       :ref:`TypedList <libdoc_typed_list>` : Theano type that represents a
-        typed list (a list where every element in the list has the same Theano
-        type). Variables of this Theano type are represented in C as objects
+*       :ref:`TypedList <libdoc_typed_list>` : Aesara type that represents a
+        typed list (a list where every element in the list has the same Aesara
+        type). Variables of this Aesara type are represented in C as objects
         of class `PyListObject <https://docs.python.org/2/c-api/list.html>`_.
 
-*       :ref:`Scalar <libdoc_scalar>` : Theano type that represents a C
-        primitive type. The C type associated with this Theano type is the
+*       :ref:`Scalar <libdoc_scalar>` : Aesara type that represents a C
+        primitive type. The C type associated with this Aesara type is the
         represented C primitive itself.
 
-*       :ref:`SparseType <sparse_ops>` : Theano `Type` used to represent sparse
-        tensors. There is no equivalent C type for this Theano `Type` but you
+*       :ref:`SparseType <sparse_ops>` : Aesara `Type` used to represent sparse
+        tensors. There is no equivalent C type for this Aesara `Type` but you
         can split a sparse variable into its parts as TensorVariables. Those
         can then be used as inputs to an op with C code.
 
-*       :class:`Generic <theano.graph.type.Generic>` : Theano type that
-        represents a simple Python Object. Variables of this Theano type are
+*       :class:`Generic <Aesara.graph.type.Generic>` : Aesara type that
+        represents a simple Python Object. Variables of this Aesara type are
         represented in C as objects of class `PyObject
         <https://docs.python.org/2/c-api/structures.html#c.PyObject>`_.
 
-*       :class:`CDataType <theano.graph.type.CDataType>` :  Theano type that
-        represents a C data type. The C type associated with this Theano type
+*       :class:`CDataType <Aesara.graph.type.CDataType>` :  Aesara type that
+        represents a C data type. The C type associated with this Aesara type
         depends on the data being represented.
diff --git a/doc/extending/pipeline.txt b/doc/extending/pipeline.txt
index c7db4774e6..f4256d7f66 100644
--- a/doc/extending/pipeline.txt
+++ b/doc/extending/pipeline.txt
@@ -6,16 +6,16 @@ Overview of the compilation pipeline
 ====================================
 
 The purpose of this page is to explain each step of defining and
-compiling a Theano function.
+compiling an Aesara function.
 
 
 Definition of the computation graph
 -----------------------------------
 
-By creating Theano :ref:`Variables <variable>` using
-``theano.tensor.lscalar`` or ``theano.tensor.dmatrix`` or by using
-Theano functions such as ``theano.tensor.sin`` or
-``theano.tensor.log``, the user builds a computation graph. The
+By creating Aesara :ref:`Variables <variable>` using
+``aesara.tensor.lscalar`` or ``aesara.tensor.dmatrix`` or by using
+Aesara functions such as ``aesara.tensor.sin`` or
+``aesara.tensor.log``, the user builds a computation graph. The
 structure of that graph and details about its components can be found
 in the :ref:`graphstructures` article.
 
@@ -25,7 +25,7 @@ Compilation of the computation graph
 ------------------------------------
 
 Once the user has built a computation graph, she can use
-``theano.function`` in order to make one or more functions that
+``aesara.function`` in order to make one or more functions that
 operate on real data. function takes a list of input :ref:`Variables
 <variable>` as well as a list of output Variables that define a
 precise subgraph corresponding to the function(s) we want to define,
@@ -73,9 +73,9 @@ extracted from the Mode. It is then called with the FunctionGraph as
 argument to
 produce a ``thunk``, which is a function with no arguments that
 returns nothing. Along with the thunk, one list of input containers (a
-theano.link.basic.Container is a sort of object that wraps another and does
+`aesara.link.basic.Container` is a sort of object that wraps another and does
 type casting) and one list of output containers are produced,
-corresponding to the input and output Variables as well as the updates
+corresponding to the input and output `Variable`s as well as the updates
 defined for the inputs when applicable. To perform the computations,
 the inputs must be placed in the input containers, the thunk must be
 called, and the outputs must be retrieved from the output containers
@@ -83,8 +83,8 @@ where the thunk put them.
 
 Typically, the linker calls the ``toposort`` method in order to obtain
 a linear sequence of operations to perform. How they are linked
-together depends on the Linker used. The CLinker produces a single
-block of C code for the whole computation, whereas the OpWiseCLinker
+together depends on the Linker used. The `CLinker` produces a single
+block of C code for the whole computation, whereas the `OpWiseCLinker`
 produces one thunk for each individual operation and calls them in
 sequence.
 
@@ -99,9 +99,9 @@ case if ``borrow`` was True, the thunk would be allowed to reuse (or
 .. note::
 
     Compiled libraries are stored within a specific compilation directory,
-    which by default is set to ``$HOME/.theano/compiledir_xxx``, where
+    which by default is set to ``$HOME/.aesara/compiledir_xxx``, where
     ``xxx`` identifies the platform (under Windows the default location
-    is instead ``$LOCALAPPDATA\Theano\compiledir_xxx``). It may be manually set
+    is instead ``$LOCALAPPDATA\Aesara\compiledir_xxx``). It may be manually set
     to a different location either by setting :attr:`config.compiledir` or
     :attr:`config.base_compiledir`, either within your Python script or by
     using one of the configuration mechanisms described in :mod:`config`.
@@ -109,9 +109,9 @@ case if ``borrow`` was True, the thunk would be allowed to reuse (or
     The compile cache is based upon the C++ code of the graph to be compiled.
     So, if you change compilation configuration variables, such as
     :attr:`config.blas__ldflags`, you will need to manually remove your compile cache,
-    using ``Theano/bin/theano-cache clear``
+    using ``Aesara/bin/aesara-cache clear``
 
-    Theano also implements a lock mechanism that prevents multiple compilations
+    Aesara also implements a lock mechanism that prevents multiple compilations
     within the same compilation directory (to avoid crashes with parallel
     execution of some scripts).
 
diff --git a/doc/extending/scan.txt b/doc/extending/scan.txt
index 12989f59ed..ac1716982b 100644
--- a/doc/extending/scan.txt
+++ b/doc/extending/scan.txt
@@ -7,14 +7,14 @@ Context
 =======
 
 This document is meant to act as reference material for developers working
-on Theano's loop mechanism. This mechanism is called Scan and its internals
+on Aesara's loop mechanism. This mechanism is called Scan and its internals
 are highly complex, hence the need for a centralized repository of knowledge
 regarding its inner workings.
 
-The ``theano.scan()`` function is the public-facing interface for looping in
-Theano. Under the hood, this function will perform some processing on its
+The ``aesara.scan()`` function is the public-facing interface for looping in
+Aesara. Under the hood, this function will perform some processing on its
 inputs and instantiate the ``Scan`` op class which implements the looping
-mechanism. It achieves this by compiling its own Theano function representing
+mechanism. It achieves this by compiling its own Aesara function representing
 the computation to be done at every iteration of the loop and calling it as
 many times as necessary.
 
@@ -32,21 +32,21 @@ Pre-requisites
 
 The following sections assumes the reader is familiar with the following :
 
-1. Theano's :ref:`graph structure <extending_theano>` (Apply nodes, Variable nodes and Ops)
+1. Aesara's :ref:`graph structure <extending_aesara>` (Apply nodes, Variable nodes and Ops)
 
-2. The interface and usage of Theano's :ref:`scan() <lib_scan>` function
+2. The interface and usage of Aesara's :ref:`scan() <lib_scan>` function
 
 Additionally, the :ref:`scan_internals_optimizations` section below assumes
 knowledge of:
 
-3. Theano's :ref:`graph optimizations <optimization>`
+3. Aesara's :ref:`graph optimizations <optimization>`
 
 
 Relevant code files
 ===================
 
 The implementation of Scan is spread over several files in
-``theano/scan``.  The different files, and sections of the code they
+``aesara/scan``.  The different files, and sections of the code they
 deal with, are :
 
 * ``basic.py`` implements the ``scan`` function. The ``scan`` function
@@ -63,7 +63,7 @@ deal with, are :
 * ``views.py`` contains different views of the scan op that have
   simpler and easier signatures to be used in specific cases.
 
-* ``opt.py`` contains the list of all Theano graph optimizations for the
+* ``opt.py`` contains the list of all Aesara graph optimizations for the
   scan operator.
 
 
@@ -73,12 +73,12 @@ Notation
 Scan being a sizeable and complex module, it has its own naming convention for
 functions and variables which this section will attempt to introduce.
 
-A scan op contains a Theano function representing the computation
+A scan op contains an Aesara function representing the computation
 that is done in a single iteration of the loop represented by the scan op (in
 other words, the computation given by the function provided as value to
-``theano.scan``'s ``fn`` argument ). Whenever we discuss a scan op, the **outer
-function** refers to the Theano function that *contains* the scan op whereas the
-**inner function** refers to the Theano function that is *contained* inside the
+``aesara.scan``'s ``fn`` argument ). Whenever we discuss a scan op, the **outer
+function** refers to the Aesara function that *contains* the scan op whereas the
+**inner function** refers to the Aesara function that is *contained* inside the
 scan op.
 
 In the same spirit, the inputs and outputs of the *Apply node wrapping the scan
@@ -94,19 +94,19 @@ Scan variables
 The following are the different types of variables that Scan has the
 capacity to handle, along with their various caracteristics.
 
-**Sequence** : A sequence is a Theano variable which Scan will iterate
+**Sequence** : A sequence is an Aesara variable which Scan will iterate
 over and give sub-elements to its inner function as input. A sequence
 has no associated output. For a sequence variable ``X``, at timestep
 ``t``, the inner function will receive as input the sequence element
 ``X[t]``. These variables are used through the argument ``sequences``
-of the ``theano.scan()`` function.
+of the ``aesara.scan()`` function.
 
-**Non-sequences** : A non-sequence is a Theano variable which Scan
+**Non-sequences** : A non-sequence is an Aesara variable which Scan
 *will provide as-is* to its inner function. Like a sequence, a
 non-sequence has no associated output. For a non-sequence variable
 ``X``, at timestep ``t``, the inner function will receive as input
 the variable ``X``. These variables are used through the argument
-``non_sequences`` of the ``theano.scan()`` function.
+``non_sequences`` of the ``aesara.scan()`` function.
 
 **Nitsot (no input tap, single output tap)** : A nitsot is an output
 variable of the inner function that is not fed back as an input to the
@@ -133,7 +133,7 @@ timestep, since every computed term needs to be reused to compute the
 two next terms of the sequence.
 
 **Mitmot (multiple input taps, multiple output taps)** : These outputs exist
-but they cannot be directly created by the user. They can appear in a theano
+but they cannot be directly created by the user. They can appear in an Aesara
 graph as a result of taking the gradient of the output of a Scan with respect
 to its inputs: This will result in the creation of a new scan node used to
 compute the gradients of the first scan node. If the original Scan had sitsots
@@ -144,7 +144,7 @@ through time for these variables.
 To synthesize :
 
 ===========================================================  =====================================================  ==========================================================  ===========================================================  =========================================================  ======================================================
-Type of scan variables                                       Corresponding outer input                              Corresponding inner input at timestep `t` (indexed from 0)  Corresponding inner output at timestep `t` (indexed from 0)  Corresponding outer output `t`                             Corresponding argument of the `theano.scan()` function
+Type of scan variables                                       Corresponding outer input                              Corresponding inner input at timestep `t` (indexed from 0)  Corresponding inner output at timestep `t` (indexed from 0)  Corresponding outer output `t`                             Corresponding argument of the `aesara.scan()` function
 ===========================================================  =====================================================  ==========================================================  ===========================================================  =========================================================  ======================================================
 Sequence                                                     Sequence of elements X                                 Individual sequence element X[t]                            *No corresponding inner output*                              *No corresponding outer output*                            `sequences`
 Non-Sequence                                                 Any variable X                                         Variable identical to X                                     *No corresponding inner output*                              *No corresponding outer output*                            `non_sequences`
diff --git a/doc/extending/tips.txt b/doc/extending/tips.txt
index 8e9677f79d..cdc90aae4e 100644
--- a/doc/extending/tips.txt
+++ b/doc/extending/tips.txt
@@ -21,26 +21,26 @@ simple function:
 
 .. testcode::
 
-   from theano import tensor as tt
+   from aesara import tensor as tt
 
    def sum_square_difference(a, b):
        return tt.sum((a - b)**2)
 
-Even without taking Theano's optimizations into account, it is likely
+Even without taking Aesara's optimizations into account, it is likely
 to work just as well as a custom implementation. It also supports all
 data types, tensors of all dimensions as well as broadcasting, whereas
 a custom implementation would probably only bother to support
 contiguous vectors/matrices of doubles...
 
 
-Use Theano's high order Ops when applicable
+Use Aesara's high order Ops when applicable
 ===========================================
 
-Theano provides some generic Op classes which allow you to generate a
+Aesara provides some generic Op classes which allow you to generate a
 lot of Ops at a lesser effort. For instance, Elemwise can be used to
 make :term:`elementwise` operations easily whereas DimShuffle can be
 used to make transpose-like transformations. These higher order Ops
-are mostly Tensor-related, as this is Theano's specialty.
+are mostly Tensor-related, as this is Aesara's specialty.
 
 
 .. _opchecklist:
diff --git a/doc/extending/type.txt b/doc/extending/type.txt
index ff14cc892c..42fe96ba35 100644
--- a/doc/extending/type.txt
+++ b/doc/extending/type.txt
@@ -1,4 +1,4 @@
-.. _theano_type:
+.. _aesara_type:
 
 ======================
 Making the double type
@@ -10,7 +10,7 @@ Making the double type
 Type's contract
 ===============
 
-In Theano's framework, a ``Type`` (:class:`Type`)
+In Aesara's framework, a ``Type`` (:class:`Type`)
 is any object which defines the following
 methods. To obtain the default methods described below, the Type should
 be an instance of ``Type`` or should be an instance of a
@@ -39,7 +39,7 @@ default values.
         type-dependent, you may decide what your own type does by default)
 
       We need to define ``filter`` with three arguments. The second argument
-      must be called ``strict`` (Theano often calls it by keyword) and must
+      must be called ``strict`` (Aesara often calls it by keyword) and must
       have a default value of ``False``. The third argument must be called
       ``allow_downcast`` and must have a default value of ``None``.
 
@@ -154,7 +154,7 @@ default values.
         objects `a` and `b` could share memory. Return False
         otherwise. It is used to debug when Ops did not declare memory
         aliasing between variables. Can be a static method.
-        It is highly recommended to use and is mandatory for Type in Theano
+        It is highly recommended to use and is mandatory for Type in Aesara
         as our buildbot runs in DebugMode.
 
 For each method, the *default* is what ``Type`` defines
@@ -171,14 +171,14 @@ Additional definitions
 ----------------------
 
 For certain mechanisms, you can register functions and other such
-things to plus your type into theano's mechanisms.  These are optional
+things to plus your type into aesara's mechanisms.  These are optional
 but will allow people to use you type with familiar interfaces.
 
 `transfer()`
 ~~~~~~~~~~~~
 
 To plug in additional options for the transfer target, define a
-function which takes a theano variable and a target argument and
+function which takes an Aesara variable and a target argument and
 returns eitehr a new transferred variable (which can be the same as
 the input if no transfer is nessecary) or returns None if the transfer
 can't be done.
@@ -252,7 +252,7 @@ chose to be 1e-4.
 
 .. note::
 
-    ``values_eq`` is never actually used by Theano, but it might be used
+    ``values_eq`` is never actually used by Aesara, but it might be used
     internally in the future. Equality testing in
     :ref:`DebugMode <debugmode>` is done using ``values_eq_approx``.
 
@@ -265,7 +265,7 @@ the Type is to instantiate a plain Type and set the needed fields:
 
 .. testcode::
 
-   from theano.graph.type import Type
+   from aesara.graph.type import Type
 
    double = Type()
    double.filter = filter
@@ -277,7 +277,7 @@ and define ``filter`` and ``values_eq_approx`` in the subclass:
 
 .. code-block:: python
 
-   from theano.graph.type import Type
+   from aesara.graph.type import Type
 
    class Double(Type):
 
@@ -300,7 +300,7 @@ instances of ``Double`` are technically the same Type. However, different
 
 .. testsetup::
 
-   from theano.graph.type import Type
+   from aesara.graph.type import Type
 
    class Double(Type):
 
@@ -335,7 +335,7 @@ instances of ``Double`` are technically the same Type. However, different
 >>> double1 == double2
 False
 
-Theano compares Types using ``==`` to see if they are the same.
+Aesara compares Types using ``==`` to see if they are the same.
 This happens in DebugMode.  Also, Ops can (and should) ensure that their inputs
 have the expected Type by checking something like ``if x.type == lvector``.
 
@@ -353,7 +353,7 @@ There are several ways to make sure that equality testing works properly:
  #. Hide the Double class and only advertise a single instance of it.
 
 Here we will prefer the final option, because it is the simplest.
-Ops in the Theano code often define the ``__eq__`` method though.
+Ops in the Aesara code often define the ``__eq__`` method though.
 
 
 Untangling some concepts
@@ -387,7 +387,7 @@ attempt to clear up the confusion:
   does not represent anything that one of its instances does not. In this
   case it is a singleton, a set with one element. However, the
   :class:`TensorType`
-  class in Theano (which is a subclass of Type)
+  class in Aesara (which is a subclass of Type)
   represents a set of types of tensors
   parametrized by their data type or number of dimensions. We could say
   that subclassing Type builds a hierarchy of Types which is based upon
@@ -399,7 +399,7 @@ Final version
 
 .. testcode::
 
-   from theano.graph.type import Type
+   from aesara.graph.type import Type
 
    class Double(Type):
 
diff --git a/doc/extending/unittest.txt b/doc/extending/unittest.txt
index 944426230e..71634ed06a 100644
--- a/doc/extending/unittest.txt
+++ b/doc/extending/unittest.txt
@@ -4,7 +4,7 @@
 Unit Testing
 ============
 
-Theano relies heavily on unit testing. Its importance cannot be
+Aesara relies heavily on unit testing. Its importance cannot be
 stressed enough!
 
 Unit Testing revolves around the following principles:
@@ -30,7 +30,7 @@ Unit Testing revolves around the following principles:
 This page is in no way meant to replace tutorials on Python's unittest
 module, for this we refer the reader to the `official documentation
 <http://docs.python.org/library/unittest.html>`_.  We will however
-adress certain specificities about how unittests relate to theano.
+adress certain specificities about how unittests relate to aesara.
 
 PyTest Primer
 ===============
@@ -40,12 +40,12 @@ We use pytest now! New tests should mostly be functions, with assertions
 How to Run Unit Tests ?
 -----------------------
 
-Mostly `pytest theano/`
+Mostly `pytest aesara/`
 
 Folder Layout
 -------------
 
-"tests" directories are scattered throughout theano. Each tests
+"tests" directories are scattered throughout aesara. Each tests
 subfolder is meant to contain the unittests which validate the .py
 files in the parent folder.
 
@@ -55,10 +55,10 @@ Optimally every python module should have a unittest file associated
 with it, as shown below. Unittests testing functionality of module
 <module>.py should therefore be stored in tests/test_<module>.py::
 
-    Theano/theano/tensor/basic.py
-    Theano/theano/tensor/elemwise.py
-    Theano/theano/tensor/tests/test_basic.py
-    Theano/theano/tensor/tests/test_elemwise.py
+    Aesara/aesara/tensor/basic.py
+    Aesara/aesara/tensor/elemwise.py
+    Aesara/aesara/tensor/tests/test_basic.py
+    Aesara/aesara/tensor/tests/test_elemwise.py
 
 
 How to Write a Unittest
@@ -123,7 +123,7 @@ Checking for correctness
 ------------------------
 
 When checking for correctness of mathematical expressions, the user
-should preferably compare theano's output to the equivalent numpy
+should preferably compare aesara's output to the equivalent numpy
 implementation.
 
 Example:
@@ -138,7 +138,7 @@ Example:
             a = T.dmatrix('a')
             b = T.dmatrix('b')
             c = T.dot(a, b)
-            f = theano.function([a, b], [c])
+            f = aesara.function([a, b], [c])
             cmp = f(self.avals, self.bvals) == numpy.dot(self.avals, self.bvals)
             self.assertTrue(numpy.all(cmp))
 
@@ -164,7 +164,7 @@ Here is a list of useful functions, as defined by TestCase:
   assertNotEqual
 
 * checking for (in)equality constraints up to a given precision (very
-  useful in theano): assertAlmostEqual,
+  useful in aesara): assertAlmostEqual,
   assertNotAlmostEqual
 
 
@@ -198,17 +198,17 @@ Useful function, as defined by TestCase:
 * assertRaises
 
 
-Test Cases and Theano Modes
+Test Cases and Aesara Modes
 ---------------------------
 
-When compiling theano functions or modules, a mode parameter can be
+When compiling aesara functions or modules, a mode parameter can be
 given to specify which linker and optimizer to use.
 
 Example:
 
 .. code-block:: python
 
-    from theano import function
+    from aesara import function
 
     f = function([a,b],[c],mode='FAST_RUN')
 
@@ -218,16 +218,16 @@ This default mode is set to
 the configuration variable :attr:`config.mode`, which defaults to
 'FAST_RUN', and can be set by various mechanisms (see :mod:`config`).
 
-In particular, the enviromnment variable :envvar:`THEANO_FLAGS`
+In particular, the enviromnment variable :envvar:`AESARA_FLAGS`
 allows the user to easily switch the mode in which unittests are
 run. For example to run all tests in all modes from a BASH script,
 type this:
 
 .. code-block:: bash
 
-    THEANO_FLAGS='mode=FAST_COMPILE' pytest
-    THEANO_FLAGS='mode=FAST_RUN' pytest
-    THEANO_FLAGS='mode=DebugMode' pytest
+    AESARA_FLAGS='mode=FAST_COMPILE' pytest
+    AESARA_FLAGS='mode=FAST_RUN' pytest
+    AESARA_FLAGS='mode=DebugMode' pytest
 
 .. _random_value_in_tests:
 
@@ -287,7 +287,7 @@ Similarly, to provide a seed to ``numpy.random.RandomState``, simply use:
 
 Note that the ability to change the seed from one test to another,
 is incompatible with the method of hard-coding the baseline variables
-(against which we compare the theano outputs). These must then be
+(against which we compare the aesara outputs). These must then be
 determined "algorithmically". Although this represents more work, the
 test suite will be better because of it.
 
@@ -301,7 +301,7 @@ Creating an Op UnitTest
 =======================
 
 A few tools have been developed to help automate the development of
-unitests for Theano Ops.
+unitests for Aesara Ops.
 
 
 .. _validating_grad:
@@ -341,8 +341,8 @@ both the given absolute and relative tolerances.
 
 The parameters are as follows:
 
-* ``fun``: a Python function that takes Theano variables as inputs,
-  and returns a Theano variable.
+* ``fun``: a Python function that takes Aesara variables as inputs,
+  and returns an Aesara variable.
   For instance, an Op instance with a single output is such a function.
   It can also be a Python function that calls an op with some of its
   inputs being fixed to specific values, or that combine multiple ops.
@@ -361,7 +361,7 @@ The parameters are as follows:
 * ``rel_tol``: relative tolerance used as threshold for gradient comparison
 
 In the general case, you can define ``fun`` as you want, as long as it
-takes as inputs Theano symbolic variables and returns a sinble Theano
+takes as inputs Aesara symbolic variables and returns a sinble Aesara
 symbolic variable:
 
 .. testcode::
@@ -375,7 +375,7 @@ symbolic variable:
         z_val = numpy.asarray(2)
         rng = numpy.random.RandomState(42)
 
-        theano.gradient.verify_grad(fun, [x_val, y_val, z_val], rng=rng)
+        aesara.gradient.verify_grad(fun, [x_val, y_val, z_val], rng=rng)
 
 Here is an example showing how to use ``verify_grad`` on an Op instance:
 
@@ -386,7 +386,7 @@ Here is an example showing how to use ``verify_grad`` on an Op instance:
         # being used is Flatten(), which takes a single input).
         a_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float64')
         rng = numpy.random.RandomState(42)
-        theano.gradient.verify_grad(tensor.Flatten(), [a_val], rng=rng)
+        aesara.gradient.verify_grad(tensor.Flatten(), [a_val], rng=rng)
 
 Here is another example, showing how to verify the gradient w.r.t. a subset of
 an Op's inputs. This is useful in particular when the gradient w.r.t. some of
@@ -408,14 +408,14 @@ which would cause ``verify_grad`` to crash.
         b_val = numpy.asarray([1, 2, 3], dtype='float64')
         rng = numpy.random.RandomState(42)
 
-        theano.gradient.verify_grad(op_with_fixed_y_idx, [x_val, b_val], rng=rng)
+        aesara.gradient.verify_grad(op_with_fixed_y_idx, [x_val, b_val], rng=rng)
 
 .. note::
 
-    Although ``verify_grad`` is defined in ``theano.tensor.basic``, unittests
+    Although ``verify_grad`` is defined in ``aesara.tensor.basic``, unittests
     should use the version of ``verify_grad`` defined in ``tests.unittest_tools``.
     This is simply a wrapper function which takes care of seeding the random
-    number generator appropriately before calling ``theano.gradient.verify_grad``
+    number generator appropriately before calling ``aesara.gradient.verify_grad``
 
 makeTester and makeBroadcastTester
 ==================================
diff --git a/doc/extending/using_params.txt b/doc/extending/using_params.txt
index 8b552fde21..81f07e62b4 100644
--- a/doc/extending/using_params.txt
+++ b/doc/extending/using_params.txt
@@ -44,7 +44,7 @@ Defining a params type
 
     This section is only relevant if you decide to create your own type.
 
-The first thing you need to do is to define a Theano Type for your
+The first thing you need to do is to define an Aesara Type for your
 params object.  It doesn't have to be complete type because only the
 following methods will be used for the type:
 
@@ -73,8 +73,8 @@ attribute :attr:`params_type` to an instance of your params Type.
 
 .. note::
 
-   If you want to have multiple parameters, Theano provides the convenient class
-   :class:`theano.graph.params_type.ParamsType` that allows to bundle many parameters into
+   If you want to have multiple parameters, Aesara provides the convenient class
+   :class:`aesara.graph.params_type.ParamsType` that allows to bundle many parameters into
    one object that will be available in both Python (as a Python object) and C code (as a struct).
    See :ref:`ParamsType tutorial and API documentation <libdoc_graph_params_type>` for more infos.
 
@@ -139,9 +139,9 @@ the params type.
 
 .. testcode::
 
-   from theano.graph.op import COp
-   from theano.graph.type import Generic
-   from theano.scalar import as_scalar
+   from aesara.graph.op import COp
+   from aesara.graph.type import Generic
+   from aesara.scalar import as_scalar
 
    class MulOp(COp):
        params_type = Generic()
@@ -175,9 +175,9 @@ weights.
 
 .. testcode::
 
-   from theano.graph.op import Op
-   from theano.graph.type import Generic
-   from theano.scalar import as_scalar
+   from aesara.graph.op import Op
+   from aesara.graph.type import Generic
+   from aesara.scalar import as_scalar
 
    class ab(object):
        def __init__(self, alpha, beta):
diff --git a/doc/faq.txt b/doc/faq.txt
index 884762c930..ac4a35c8c6 100644
--- a/doc/faq.txt
+++ b/doc/faq.txt
@@ -6,7 +6,7 @@
 Frequently Asked Questions
 ==========================
 
-Does Theano support Python 3?
+Does Aesara support Python 3?
 ------------------------------
 
 We support both Python 2 >= 2.7 and Python 3 >= 3.4.
@@ -14,8 +14,8 @@ We support both Python 2 >= 2.7 and Python 3 >= 3.4.
 Output slight numerical difference
 ----------------------------------
 
-Sometimes when you compare the output of Theano using different
-Theano flags, Theano versions, CPU and GPU or with other software like
+Sometimes when you compare the output of Aesara using different
+Aesara flags, Aesara versions, CPU and GPU or with other software like
 NumPy, you will see small numerical differences.
 
 This is normal. Floating point numbers are approximations of real
@@ -37,24 +37,24 @@ Use it at your own risk. Some people warned that the ``-ftree-loop-distribution`
 
 In the past we said that if the ``compiledir`` was not shared by multiple
 computers, you could add the ``-march=native`` flag. Now we recommend
-to remove this flag as Theano does it automatically and safely,
+to remove this flag as Aesara does it automatically and safely,
 even if the ``compiledir`` is shared by multiple computers with different
-CPUs. In fact, Theano asks g++ what are the equivalent flags it uses, and re-uses
+CPUs. In fact, Aesara asks g++ what are the equivalent flags it uses, and re-uses
 them directly.
 
 
-.. _faster-theano-function-compilation:
+.. _faster-aesara-function-compilation:
 
-Faster Theano Function Compilation
+Faster Aesara Function Compilation
 ----------------------------------
 
-Theano function compilation can be time consuming. It can be sped up by setting
-the flag ``mode=FAST_COMPILE`` which instructs Theano to skip most
+Aesara function compilation can be time consuming. It can be sped up by setting
+the flag ``mode=FAST_COMPILE`` which instructs Aesara to skip most
 optimizations and disables the generation of any c/cuda code. This is useful
 for quickly testing a simple idea.
 
 If c/cuda code is necessary, as when using a GPU, the flag
-``optimizer=fast_compile`` can be used instead. It instructs Theano to
+``optimizer=fast_compile`` can be used instead. It instructs Aesara to
 skip time consuming optimizations but still generate c/cuda code.
 
 Similarly using the flag ``optimizer_excluding=inplace`` will speed up
@@ -71,28 +71,28 @@ usage.
 
 Alternatively, if the graph is big, using the flag ``cycle_detection=fast``
 will speedup the computations by removing some of the inplace
-optimizations. This would allow theano to skip a time consuming cycle
+optimizations. This would allow aesara to skip a time consuming cycle
 detection algorithm. If the graph is big enough,we suggest that you use
 this flag instead of ``optimizer_excluding=inplace``. It will result in a
 computation time that is in between fast compile and fast run.
 
-Theano flag `reoptimize_unpickled_function` controls if an unpickled
-theano function should reoptimize its graph or not. Theano users can
-use the standard python pickle tools to save a compiled theano
+Aesara flag `reoptimize_unpickled_function` controls if an unpickled
+aesara function should reoptimize its graph or not. Aesara users can
+use the standard python pickle tools to save a compiled aesara
 function. When pickling, both graph before and after the optimization
 are saved, including shared variables. When set to True, the graph is
 reoptimized when being unpickled. Otherwise, skip the graph
 optimization and use directly the optimized graph from the pickled
 file. The default is False.
 
-Faster Theano function
+Faster Aesara function
 ----------------------
 
-You can set the Theano flag :attr:`allow_gc <config.allow_gc>` to ``False`` to get a speed-up by using
-more memory. By default, Theano frees intermediate results when we don't need
+You can set the Aesara flag :attr:`allow_gc <config.allow_gc>` to ``False`` to get a speed-up by using
+more memory. By default, Aesara frees intermediate results when we don't need
 them anymore. Doing so prevents us from reusing this memory. So disabling the
 garbage collection will keep all intermediate results' memory space to allow to
-reuse them during the next call to the same Theano function, if they are of the
+reuse them during the next call to the same Aesara function, if they are of the
 correct shape. The shape could change if the shapes of the inputs change.
 
 .. note::
@@ -106,28 +106,28 @@ Unsafe optimization
 ===================
 
 
-Some Theano optimizations make the assumption that the user inputs are
+Some Aesara optimizations make the assumption that the user inputs are
 valid. What this means is that if the user provides invalid values (like
 incompatible shapes or indexing values that are out of bounds) and
 the optimizations are applied, the user error will get lost. Most of the
 time, the assumption is that the user inputs are valid. So it is good
 to have the optimization being applied, but loosing the error is bad.
-The newest optimization in Theano with such assumption will add an
+The newest optimization in Aesara with such assumption will add an
 assertion in the graph to keep the user error message. Computing
 these assertions could take some time. If you are sure everything is valid
-in your graph and want the fastest possible Theano, you can enable an
+in your graph and want the fastest possible Aesara, you can enable an
 optimization that will remove those assertions with:
 ``optimizer_including=local_remove_all_assert``
 
 
-Faster Small Theano function
+Faster Small Aesara function
 ----------------------------
 
 .. note::
 
-   For Theano 0.6 and up.
+   For Aesara 0.6 and up.
 
-For Theano functions that don't do much work, like a regular logistic
+For Aesara functions that don't do much work, like a regular logistic
 regression, the overhead of checking the input can be significant. You
 can disable it by setting ``f.trust_input`` to True.
 Make sure the types of arguments you provide match those defined when
@@ -137,10 +137,10 @@ For example, replace the following
 
 .. testcode:: faster
 
-    import theano
-    from theano import function
+    import aesara
+    from aesara import function
 
-    x = theano.tensor.type.scalar('x')
+    x = aesara.tensor.type.scalar('x')
     f = function([x], x + 1.)
     f(10.)
 
@@ -149,16 +149,16 @@ with
 .. testcode:: faster
 
     import numpy
-    import theano
-    from theano import function
+    import aesara
+    from aesara import function
 
-    x = theano.tensor.type.scalar('x')
+    x = aesara.tensor.type.scalar('x')
     f = function([x], x + 1.)
     f.trust_input = True
-    f(numpy.array([10.], dtype=theano.config.floatX))
+    f(numpy.array([10.], dtype=aesara.config.floatX))
 
-Also, for small Theano functions, you can remove more Python overhead by
-making a Theano function that does not take any input. You can use shared
+Also, for small Aesara functions, you can remove more Python overhead by
+making an Aesara function that does not take any input. You can use shared
 variables to achieve this. Then you can call it like this: ``f.fn()`` or
 ``f.fn(n_calls=N)`` to speed it up. In the last case, only the last
 function output (out of N calls) is returned.
@@ -169,32 +169,32 @@ but requires that all nodes in the graph have a C implementation:
 
 .. code-block:: python
 
-    x = theano.tensor.type.scalar('x')
-    f = function([x], (x + 1.) * 2, mode=theano.compile.mode.Mode(linker='c'))
+    x = aesara.tensor.type.scalar('x')
+    f = function([x], (x + 1.) * 2, mode=aesara.compile.mode.Mode(linker='c'))
     f(10.)
 
 New GPU backend using libgpuarray
 ---------------------------------
 
-The new theano GPU backend (:ref:`gpuarray`) uses ``config.gpuarray__preallocate`` for GPU memory allocation.
+The new aesara GPU backend (:ref:`gpuarray`) uses ``config.gpuarray__preallocate`` for GPU memory allocation.
 
 Related Projects
 ----------------
 
-We try to list in this `wiki page <https://github.com/Theano/Theano/wiki/Related-projects>`_ other Theano related projects.
+We try to list in this `wiki page <https://github.com/Aesara/Aesara/wiki/Related-projects>`_ other Aesara related projects.
 
 
-"What are Theano's Limitations?"
+"What are Aesara's Limitations?"
 --------------------------------
 
-Theano offers a good amount of flexibility, but has some limitations too.
+Aesara offers a good amount of flexibility, but has some limitations too.
 You must answer for yourself the following question: How can my algorithm be cleverly written
-so as to make the most of what Theano can do?
+so as to make the most of what Aesara can do?
 
 Here is a list of some of the known limitations:
 
 - *While*- or *for*-Loops within an expression graph are supported, but only via
-  the :func:`theano.scan` op (which puts restrictions on how the loop body can
+  the :func:`aesara.scan` op (which puts restrictions on how the loop body can
   interact with the rest of the graph).
 
 - Neither *goto* nor *recursion* is supported or planned within expression graphs.
diff --git a/doc/glossary.txt b/doc/glossary.txt
index 0d0dbacb13..86d75d3af9 100644
--- a/doc/glossary.txt
+++ b/doc/glossary.txt
@@ -5,8 +5,8 @@ Glossary
 
 .. testsetup::
 
-   import theano
-   from theano import tensor as tt
+   import aesara
+   from aesara import tensor as tt
 
 .. glossary::
 
@@ -59,8 +59,8 @@ Glossary
     Expression Graph
         A directed, acyclic set of connected :term:`Variable` and
         :term:`Apply` nodes that express symbolic functional relationship
-        between variables.  You use Theano by defining expression graphs, and
-        then compiling them with :term:`theano.function`.
+        between variables.  You use Aesara by defining expression graphs, and
+        then compiling them with :term:`aesara.function`.
 
         See also :term:`Variable`, :term:`Op`, :term:`Apply`, and
         :term:`Type`, or read more about :ref:`graphstructures`.
@@ -70,8 +70,8 @@ Glossary
         computation requires that one or more inputs be overwritten or
         otherwise invalidated.  For example, :term:`inplace` Ops are
         destructive.  Destructive Ops can sometimes be faster than
-        non-destructive alternatives.  Theano encourages users not to put
-        destructive Ops into graphs that are given to :term:`theano.function`,
+        non-destructive alternatives.  Aesara encourages users not to put
+        destructive Ops into graphs that are given to :term:`aesara.function`,
         but instead to trust the optimizations to insert destructive ops
         judiciously.
 
@@ -96,16 +96,16 @@ Glossary
 
     Mode
         An object providing an :term:`optimizer` and a :term:`linker` that is
-        passed to :term:`theano.function`.  It parametrizes how an expression
+        passed to :term:`aesara.function`.  It parametrizes how an expression
         graph is converted to a callable object.
 
     Op
         The ``.op`` of an :term:`Apply`, together with its symbolic inputs
         fully determines what kind of computation will be carried out for that
         ``Apply`` at run-time.  Mathematical functions such as addition
-        (``T.add``) and indexing  ``x[i]`` are Ops in Theano.  Much of the
+        (``T.add``) and indexing  ``x[i]`` are Ops in Aesara.  Much of the
         library documentation is devoted to describing the various Ops that
-        are provided with Theano, but you can add more.
+        are provided with Aesara, but you can add more.
 
         See also :term:`Variable`, :term:`Type`, and :term:`Apply`,
         or read more about :ref:`graphstructures`.
@@ -116,7 +116,7 @@ Glossary
 
     Optimization
         A :term:`graph` transformation applied by an :term:`optimizer` during
-        the compilation of a :term:`graph` by :term:`theano.function`.
+        the compilation of a :term:`graph` by :term:`aesara.function`.
 
     Pure
         An :term:`Op` is *pure* if it has no :term:`destructive` side-effects.
@@ -127,10 +127,10 @@ Glossary
         (such as :term:`constant` and :term:`shared variable <shared variable>` the storage is not internal.
 
     Shared Variable
-        A :term:`Variable` whose value may be shared between multiple functions.  See :func:`shared <shared.shared>` and :func:`theano.function <function.function>`.
+        A :term:`Variable` whose value may be shared between multiple functions.  See :func:`shared <shared.shared>` and :func:`aesara.function <function.function>`.
 
-    theano.function
-        The interface for Theano's compilation from symbolic expression graphs
+    aesara.function
+        The interface for Aesara's compilation from symbolic expression graphs
         to callable objects.  See :func:`function.function`.
 
     Type
@@ -144,10 +144,10 @@ Glossary
         or read more about :ref:`graphstructures`.
 
     Variable
-        The the main data structure you work with when using Theano.
+        The the main data structure you work with when using Aesara.
         For example,
 
-        >>> x = theano.tensor.ivector()
+        >>> x = aesara.tensor.ivector()
         >>> y = -x**2
 
         ``x`` and ``y`` are both `Variables`, i.e. instances of the :class:`Variable` class.
@@ -160,7 +160,7 @@ Glossary
         constant time by simply re-indexing their inputs.   The outputs from
         [the Apply instances from] such Ops are called `Views` because their
         storage might be aliased to the storage of other variables (the inputs
-        of the Apply).  It is important for Theano to know which Variables are
+        of the Apply).  It is important for Aesara to know which Variables are
         views of which other ones in order to introduce :term:`Destructive`
         Ops correctly.
 
diff --git a/doc/hpcs2011_tutorial/Makefile b/doc/hpcs2011_tutorial/Makefile
deleted file mode 100644
index 4d5b0b3330..0000000000
--- a/doc/hpcs2011_tutorial/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-all: presentation.pdf
-
-presentation.pdf: presentation.tex pics/f_optimized.png pics/logreg_pydotprint_prediction.png
-# pics/f_unoptimized.png pics/logreg_pydotprint_predic.png pics/logreg_pydotprint_train.png
-	pdflatex presentation.tex
-
-pics/f_optimized.png: simple_example.py
-	python simple_example.py
-
-pics/logreg_pydotprint_prediction.png: logreg_example.py
-	python logreg_example.py
-#pics/f_unoptimized.png: simple_example.py
-#	python simple_example.py
-
diff --git a/doc/hpcs2011_tutorial/double_op.py b/doc/hpcs2011_tutorial/double_op.py
deleted file mode 100644
index b24dc2d91e..0000000000
--- a/doc/hpcs2011_tutorial/double_op.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import numpy as np
-
-import theano
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-
-
-class DoubleOp(Op):
-    def make_node(self, x):
-        x = theano.tensor.as_tensor_variable(x)
-        return Apply(self, [x], [x.type()])
-
-    def perform(self, node, inputs, output_storage):
-        x = inputs[0]
-        z = output_storage[0]
-        z[0] = x * 2
-
-
-x = theano.tensor.matrix()
-
-f = theano.function([x], DoubleOp()(x))
-
-inp = np.random.rand(5, 5)
-out = f(inp)
-assert np.allclose(inp * 2, out)
-print(inp)
-print(out)
diff --git a/doc/hpcs2011_tutorial/logreg_example.py b/doc/hpcs2011_tutorial/logreg_example.py
deleted file mode 100644
index c5c7ad6410..0000000000
--- a/doc/hpcs2011_tutorial/logreg_example.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import numpy as np
-import theano
-import theano.tensor as tt
-
-rng = np.random
-
-N = 400
-feats = 784
-D = (
-    rng.randn(N, feats).astype(theano.config.floatX),
-    rng.randint(size=N, low=0, high=2).astype(theano.config.floatX),
-)
-training_steps = 10000
-
-# Declare Theano symbolic variables
-x = tt.matrix("x")
-y = tt.vector("y")
-w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
-b = theano.shared(np.asarray(0.0, dtype=theano.config.floatX), name="b")
-x.tag.test_value = D[0]
-y.tag.test_value = D[1]
-# print "Initial model:"
-# print w.get_value(), b.get_value()
-
-
-# Construct Theano expression graph
-p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))  # Probability of having a one
-prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
-xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy
-cost = xent.mean() + 0.01 * (w ** 2).sum()  # The cost to optimize
-gw, gb = tt.grad(cost, [w, b])
-
-# Compile expressions to functions
-train = theano.function(
-    inputs=[x, y],
-    outputs=[prediction, xent],
-    updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
-    name="train",
-)
-predict = theano.function(inputs=[x], outputs=prediction, name="predict")
-
-if any([x.op.__class__.__name__ == "Gemv" for x in train.maker.fgraph.toposort()]):
-    print("Used the cpu")
-elif any([x.op.__class__.__name__ == "GpuGemm" for x in train.maker.fgraph.toposort()]):
-    print("Used the gpu")
-else:
-    print("ERROR, not able to tell if theano used the cpu or the gpu")
-    print(train.maker.fgraph.toposort())
-
-
-for i in range(training_steps):
-    pred, err = train(D[0], D[1])
-# print "Final model:"
-# print w.get_value(), b.get_value()
-
-print("target values for D")
-print(D[1])
-
-print("prediction on D")
-print(predict(D[0]))
-
-# Print the graph used in the slides
-theano.printing.pydotprint(
-    predict, outfile="pics/logreg_pydotprint_predic.png", var_with_name_simple=True
-)
-theano.printing.pydotprint(
-    prediction,
-    outfile="pics/logreg_pydotprint_prediction.png",
-    var_with_name_simple=True,
-)
-theano.printing.pydotprint(
-    train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True
-)
diff --git a/doc/hpcs2011_tutorial/pics/CPU_VS_GPU.png b/doc/hpcs2011_tutorial/pics/CPU_VS_GPU.png
deleted file mode 100644
index f7963cb3af..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/CPU_VS_GPU.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/Caffeine_Machine_no_background_red.png b/doc/hpcs2011_tutorial/pics/Caffeine_Machine_no_background_red.png
deleted file mode 100644
index 41650a903b..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/Caffeine_Machine_no_background_red.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop.pdf b/doc/hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop.pdf
deleted file mode 100644
index 5caf747b92..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop.pdf and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/apply_node.odg b/doc/hpcs2011_tutorial/pics/apply_node.odg
deleted file mode 100644
index faaefc159e..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/apply_node.odg and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/apply_node.pdf b/doc/hpcs2011_tutorial/pics/apply_node.pdf
deleted file mode 100644
index e9628721e3..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/apply_node.pdf and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/apply_node.png b/doc/hpcs2011_tutorial/pics/apply_node.png
deleted file mode 100644
index 3e060b2bbb..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/apply_node.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/bloc_repartition.png b/doc/hpcs2011_tutorial/pics/bloc_repartition.png
deleted file mode 100644
index 36b7a752a7..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/bloc_repartition.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/conv.pdf b/doc/hpcs2011_tutorial/pics/conv.pdf
deleted file mode 100644
index 93e993ad5f..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/conv.pdf and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/conv.png b/doc/hpcs2011_tutorial/pics/conv.png
deleted file mode 100644
index da4eaeea00..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/conv.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/f_optimized.png b/doc/hpcs2011_tutorial/pics/f_optimized.png
deleted file mode 100644
index 7576ab4e19..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/f_optimized.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/f_unoptimized.png b/doc/hpcs2011_tutorial/pics/f_unoptimized.png
deleted file mode 100644
index 105f37c06a..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/f_unoptimized.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/grid_block_thread.png b/doc/hpcs2011_tutorial/pics/grid_block_thread.png
deleted file mode 100644
index 2e4882142d..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/grid_block_thread.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/lisabook_logo_text_3.png b/doc/hpcs2011_tutorial/pics/lisabook_logo_text_3.png
deleted file mode 100644
index b17891564e..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/lisabook_logo_text_3.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_predic.png b/doc/hpcs2011_tutorial/pics/logreg_pydotprint_predic.png
deleted file mode 100644
index d4d9047a87..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_predic.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_prediction.png b/doc/hpcs2011_tutorial/pics/logreg_pydotprint_prediction.png
deleted file mode 100644
index acaa154cbc..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_prediction.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_train.png b/doc/hpcs2011_tutorial/pics/logreg_pydotprint_train.png
deleted file mode 100644
index 627df9893c..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/logreg_pydotprint_train.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/mlp.pdf b/doc/hpcs2011_tutorial/pics/mlp.pdf
deleted file mode 100644
index 33c1282ec8..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/mlp.pdf and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/mlp.png b/doc/hpcs2011_tutorial/pics/mlp.png
deleted file mode 100644
index ed5830b956..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/mlp.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/multiple_graph.pdf b/doc/hpcs2011_tutorial/pics/multiple_graph.pdf
deleted file mode 100644
index c4bfddf1d0..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/multiple_graph.pdf and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/multiple_graph.png b/doc/hpcs2011_tutorial/pics/multiple_graph.png
deleted file mode 100644
index 1b1bbc0216..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/multiple_graph.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/pipeline.odg b/doc/hpcs2011_tutorial/pics/pipeline.odg
deleted file mode 100644
index 7b0aa41b44..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/pipeline.odg and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/pipeline.pdf b/doc/hpcs2011_tutorial/pics/pipeline.pdf
deleted file mode 100644
index 21d6b08dca..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/pipeline.pdf and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/pipeline.png b/doc/hpcs2011_tutorial/pics/pipeline.png
deleted file mode 100644
index 70e94d2951..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/pipeline.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/pycuda-logo-crop.pdf b/doc/hpcs2011_tutorial/pics/pycuda-logo-crop.pdf
deleted file mode 100644
index cb0a5df072..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/pycuda-logo-crop.pdf and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/pics/theano_logo.png b/doc/hpcs2011_tutorial/pics/theano_logo.png
deleted file mode 100644
index 5157c7d25d..0000000000
Binary files a/doc/hpcs2011_tutorial/pics/theano_logo.png and /dev/null differ
diff --git a/doc/hpcs2011_tutorial/presentation.tex b/doc/hpcs2011_tutorial/presentation.tex
deleted file mode 100644
index 81234bca0b..0000000000
--- a/doc/hpcs2011_tutorial/presentation.tex
+++ /dev/null
@@ -1,1598 +0,0 @@
-\documentclass[a4paper,9pt]{beamer}
-\usetheme{Malmoe}  % Now it's a beamer presentation with the lisa theme!
-\setbeamertemplate{footline}[page number]
-\usecolortheme{beaver}
-\usepackage{url}
-\usepackage{ragged2e}
-\usepackage{multirow}
-\usepackage{fancyvrb}
-%\usepackage{color}
-\def\imagetop#1{\vtop{\null\hbox{#1}}}
-
-
-\logo{\includegraphics[width=.8in]{pics/UdeM_NoirBleu_logo_Marie_crop.pdf}}
-% Standard LaTeX stuff - note the optional abbreviated title being provided
-
-%% ALL that presentation slide are not used! We use a normal frame for that.
-\title[GPU Programming made Easy]{GPU Programming made Easy}
-\author[LISA lab]{Fr\'ed\'eric Bastien \\
-Laboratoire d'Informatique des Syst\`emes Adaptatifs \\
-D\'epartement d'informatique et de recherche op\'erationelle}
-
-\date{
-James Bergstra, Olivier Breuleux, Frederic Bastien,
-\vfill
-\vfill
-
-{\small
-Arnaud Bergeron,
-Yoshua Bengio,
-Thierry Bertin-Mahieux,
-Josh Bleecher Snyder,
-Olivier Delalleau,
-Guillaume Desjardins,
-Douglas Eck,
-Dumitru Erhan,
-Xavier Glorot,
-Ian Goodfellow,
-Philippe Hamel,
-Pascal Lamblin,
-Simon Lemieux,
-Michael Mandel,
-Razvan Pascanu,
-Fran\c{c}ois Savard,
-Joseph Turian,
-David Warde-Farley
-}
-
-Presented on June 13\textsuperscript{th} 2011\\
-HPCS 2011, Montr\'eal
-
-}
-
-
-
-\begin{document}
-
-
-%\frame{\titlepage}
-\frame{
-\vfill
-\begin{center}
-\textcolor{red}{\huge{GPU Programming made Easy}}\\
-\vfill
-%\small{\it presented by}\\
-\large{Fr\'ed\'eric Bastien}\\
-\vfill
-%\begin{spacing}{0.9}
-{\small Laboratoire d'Informatique des Syst\`emes Adaptatifs}\\
-{\small D\'epartement d'informatique et de recherche op\'erationelle}\\
-%{\small Université de Montr\'eal}
-%\end{spacing}
-\vfill
-James Bergstra, Olivier Breuleux, Frederic Bastien,
-\vfill
-{\footnotesize%\small
-Arnaud Bergeron,
-Yoshua Bengio,
-Thierry Bertin-Mahieux,
-Josh Bleecher Snyder,
-Olivier Delalleau,
-Guillaume Desjardins,
-Douglas Eck,
-Dumitru Erhan,
-Xavier Glorot,
-Ian Goodfellow,
-Philippe Hamel,
-Pascal Lamblin,
-Simon Lemieux,
-Michael Mandel,
-Razvan Pascanu,
-Fran\c{c}ois Savard,
-Joseph Turian,
-David Warde-Farley
-}
-\vfill
-Presented on June 13\textsuperscript{th} 2011\\
-HPCS 2011, Montr\'eal
-\end{center}
-
-\includegraphics[width=.6in]{pics/lisabook_logo_text_3.png}
-}
-
-\section{Overview}
-\subsection{Motivation}
-\begin{frame}
-  \frametitle{Theano Goal}
-\begin{itemize}
-\item Tries to be the {\bf holy grail} in computing: {\it easy to code} and {\it fast to execute} !
-\item Only on mathematical expressions
-\item So you won't have:
-  \begin{itemize}
-  \item Function call inside a theano function
-  \item Structure, enum
-  \item Dynamic type (Theano is Fully typed)
-  \item ...
-  \item And doesn't do coffee! \includegraphics[width=1.3in]{pics/Caffeine_Machine_no_background_red.png}
-  \end{itemize}
-\end{itemize}
-\end{frame}
-
-\frame{
-  \frametitle{Faster on CPU and GPU}
-\includegraphics[width=3.in]{pics/mlp.pdf}
-}
-
-\frame{
-  \frametitle{Project Status}
-  Why you can rely on Theano:
-  \begin{itemize}
-  \item Theano has been developed and used since January 2008 (3.5 yrs old)
-  \item Core technology for a funded Silicon-Valley startup
-  \item Driven over 40 research papers in the last few years
-  \item Good user documentation
-  \item Active mailing list with participants from outside our lab
-  \item Many contributors (some from outside our lab)
-
-    \vfill
-  \item Used to teach IFT6266 for two years
-  \item Used by everyone in our lab (\textasciitilde 30 people)
-  \item Deep Learning Tutorials
-  \item Unofficial RPMs for Mandriva
-  \item Downloads (June 8 2011, since last January):
-    \begin{itemize}
-      \item Pypi 780
-      \item MLOSS: 483
-      \item Assembla (``bleeding edge'' repository): unknown
-    \end{itemize}
-  \end{itemize}
-}
-
-\subsection{Overview}
-\frame{
-  \frametitle{Overview 1}
-  \begin{itemize}
-  \item {\bf Exercises as we go}
-  \item Introduction
-    \begin{itemize}
-%Why GPU
-    \item Why Scripting for GPUs?
-    \item Theano vs. PyCUDA vs. PyOpenCL vs. CUDA
-%What is your background
-    \item Python in 1 slide
-    \item NumPy in 1 slide
-    \end{itemize}
-  \item Theano
-    \begin{itemize}
-    \item Introduction
-    \item Simple example
-% gpu for exercices
-% Exercises 1 and how to download the files
-    \item Real example
-% More info on tt.grad
-% Where are the optimization in the example?
-% Exercises 2: logreg\_example.py
-    \item Theano Flags
-    \item GPU
-% Exercises 3: logreg\_example.py on the gpu
-    \item Symbolic Variables
-    \item Differentiation Details
-    \item Benchmarks % MLP, Convolucion, Elemwise
-    \end{itemize}
-  \item break?
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{Overview 2}
-%  \begin{tabular}{lcr}
-
-  \begin{itemize}
-  \item Advanced Theano
-    \begin{itemize}
-    \item Compilation Pipeline
-    \item Inplace Optimization
-    \item Profiling
-%exercises 4: ProfileMode on logreg\_example, CPU vs GPU
-    \item Drawing/Printing Theano Graph
-    \item Debugging
-    \item Scan (For-Loop generalization)
-%exercises 5: about scan
-    \item Known Limitations
-    \end{itemize} %& \includegraphics[width=1.in]{pics/theano_logo.png}
-  \begin{tabular}{lcr}
-    \imagetop{\includegraphics[width=1.in]{pics/theano_logo.png}}&
-    %\imagetop{\includegraphics[width=.6in]{pics/pycuda-logo-crop.pdf}}
-  \end{tabular}
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{Overview 3}
-  \begin{itemize}
-  \item PyCUDA
-    \begin{itemize}
-    \item Introduction
-    \item Example
-% Exercices 6: pycuda_simple.py
-    \end{itemize}
-  \item CUDA Overview
-  \item Extending Theano
-    \begin{itemize}
-    \item Theano Graph
-    \item Op Contract
-    \item Op Example
-% Exercises 7: double.py
-    \item Theano + PyCUDA
-% Exercises 8: pycuda_double_op.py
-    \end{itemize}
-  \item GpuNdArray
-  \item Conclusion
-  \end{itemize}
-  \begin{tabular}{lcr}
-    %\imagetop{\includegraphics[width=1.in]{pics/theano_logo.png}}&
-    \imagetop{\includegraphics[width=.6in]{pics/pycuda-logo-crop.pdf}}
-  \end{tabular}
-}
-
-\frame{
-  \frametitle{Overview 4}
-  \begin{itemize}
-  \item Only high level overview of CUDA
-  \item Won't talk about how to optimize GPU code
-  \end{itemize}
-}
-
-\section{Introduction}
-\subsection{Introduction}
-\frame{
-  \frametitle{Why GPU}
-  \begin{itemize}
-  \item Faster, cheaper, more efficient power usage
-  \item How much faster? I have seen numbers from 100x slower to 1000x faster.
-    \begin{itemize}
-    \item It depends on the algorithms
-    \item How the benchmark is done
-      \begin{itemize}
-      \item Quality of implementation
-      \item How much time was spent optimizing CPU vs GPU code
-      \end{itemize}
-    \item In Theory:
-      \begin{itemize}
-      \item Intel Core i7 980 XE (107Gf/s float64) 6 cores
-      \item NVIDIA C2050 (515 Gf/s float64, 1Tf/s float32) 480 cores
-      \item NVIDIA GTX580 (1.5Tf/s float32) 512 cores
-      \end{itemize}
-    \end{itemize}
-  \item Theano goes up to 100x faster on th GPU because we don't use multiple core on CPU
-    \begin{itemize}
-    \item Theano can be linked with multi-core capable BLAS (GEMM and GEMV)
-    \end{itemize}
-  \item If you see 1000x, it probably means the benchmark is not fair
-\end{itemize}
-
-}
-\frame{
-  \frametitle{Why Scripting for GPUs}
-  They {\bf Complement each other}
-  \begin{itemize}
-  \item GPUs are everything that scripting/high level languages are not
-    \begin{itemize}
-    \item Highly parallel
-    \item Very architecture-sensitive
-    \item Built for maximum FP/memory throughput
-    \end{itemize}
-  \item CPU: largely restricted to control
-    \begin{itemize}
-    \item Optimized for sequential code and \textit{low latency} (rather than high throughput)
-    \item Tasks (1000/sec)
-    \item Scripting fast enough
-    \end{itemize}
-  \end{itemize}
-}
-\frame{
-  \frametitle{Theano vs PyCUDA vs PyOpenCL vs CUDA}
-  \begin{itemize}
-    \item Theano
-      \begin{itemize}
-      \item Mathematical expression compiler
-      \item Generates costum C and CUDA code
-      \item Uses Python code when performance is not critical
-      \end{itemize}
-    \item CUDA
-      \begin{itemize}
-      \item C extension by NVIDA that allow to code and use GPU
-      \end{itemize}
-    \item PyCUDA (Python + CUDA)
-      \begin{itemize}
-        \item Python interface to CUDA
-        \item Memory management of GPU objects
-        \item Compilation of code for the low-level driver
-      \end{itemize}
-    \item PyOpenCL (Python + OpenCL)
-      \begin{itemize}
-      \item PyCUDA for OpenCL
-      \end{itemize}
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{What is your background ?}
-  Do you have experience with :
-  \begin{itemize}
-  \item Python
-  \item NumPy / SciPy / Matlab
-  \item Maple / Mathematica / SymPy
-  \item GPU programming / CUDA / OpenCL
-  \item Cython / Weave / Numexpr
-  \item C / Java / Fortran
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{Python in 1 Slide}
-  \begin{itemize}
-    \item Interpreted language
-    \item General-purpose high-level programming language
-    \item OO and scripting language
-    \item Emphasizes code readability
-    \item Large and comprehensive standard library
-    \item Indentation for block delimiters
-    \item Dynamic type and memory management
-    \item Dictionary \texttt{d=\{'var1':'value1', 'var2':42, ...\}}
-    \item List comprehension: \texttt{[i+3 for i in range(10)]}
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{NumPy in 1 Slide}
-  \begin{itemize}
-  \item Base scientific computing package in Python on the CPU
-  \item A powerful N-dimensional array object
-    \begin{itemize}
-    \item ndarray.\{ndim, shape, size, dtype, itemsize, stride\}
-    \end{itemize}
-  \item Sophisticated ``broadcasting'' functions
-    \begin{itemize}
-    \item \texttt{numpy.random.rand(4,5) * numpy.random.rand(1,5)} $\Rightarrow$ mat(4,5)
-    \item \texttt{numpy.random.rand(4,5) * numpy.random.rand(4,1)} $\Rightarrow$ mat(4,5)
-    \item \texttt{numpy.random.rand(4,5) * numpy.random.rand(5)} $\Rightarrow$ mat(4,5)
-    \end{itemize}
-  \item Tools for integrating C/C++ and Fortran code
-  \item Linear algebra, Fourier transform and pseudorandom number generation
-  \end{itemize}
-
-
-}
-
-%\frame{
-%  \frametitle{Competitors TODO: Remove? Missing many I think!}
-%  There are some competitors for easy computing on gpu.
-%  \begin{itemize}
-%  \item Jacket(GPU for matlab): http://www.accelereyes.com/
-%  \item GPUmat(GPU for matlab, free): http://gp-you.org/
-%  \item numexpr, algopy
-%  \end{itemize}
-%}
-
-\section{Theano}
-\subsection{Introduction}
-\frame{
-%%  \frametitle{Theano}
-\begin{center}
-  \includegraphics[width=3.in]{../images/theano_logo_allblue_350x95.png}
-%  \includegraphics[width=3.in]{../images/theano_logo_allblue_200x54.png}
-\end{center}
-}
-
-\frame{
-  \frametitle{Pointers}
-  \begin{itemize}
-  \item Website: http://deeplearning.net/software/theano/
-  \item Announcements mailing list: http://groups.google.com/group/theano-announce
-  \item User mailing list: http://groups.google.com/group/theano-users
-  \item Deep Learning Tutorials: http://www.deeplearning.net/tutorial/
-
-    \vfill
-  \item Installation: https://deeplearning.net/software/theano/install.html
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{Description}
-  \begin{itemize}
-  \item Mathematical symbolic expression compiler
-  \item Dynamic C/CUDA code generation
-  \item Efficient symbolic differentiation
-    \begin{itemize}
-    \item Theano computes derivatives of functions with one or many inputs.
-    \end{itemize}
-  \item Speed and stability optimizations
-    \begin{itemize}
-    \item Gives the right answer for $\log(1+x)$ even if x is really tiny.
-    \end{itemize}
-  \item Works on Linux, Mac and Windows
-  \item Transparent use of a GPU
-    \begin{itemize}
-    \item float32 only for now (working on other data types)
-    \item Still in experimental state on Windows
-    \item On GPU data-intensive calculations are typically between 6.5x and 44x faster. We've seen speedups up to 140x
-    \end{itemize}  \end{itemize}
-}
-
-\frame{
-  \frametitle{Description 2}
-  \begin{itemize}
-  \item Extensive unit-testing and self-verification
-    \begin{itemize}
-    \item Detects and diagnoses many types of errors
-    \end{itemize}
-  \item On CPU, common machine learning algorithms are 1.6x to 7.5x faster than competitive alternatives
-    \begin{itemize}
-    \item including specialized implementations in C/C++, NumPy, SciPy, and Matlab
-    \end{itemize}
-  \item Expressions mimic NumPy's syntax \& semantics
-  \item Statically typed and purely functional
-  \item Some sparse operations (CPU only)
-  \item The project was started by James Bergstra and Olivier Breuleux
-  \item For the past 1-2 years, I have replaced Olivier as lead contributor
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{Why Theano is better}
-  Executing the code is faster because Theano:
-  \begin{itemize}
-  \item Rearranges high-level expressions
-  \item Produces customized low-level code
-  \item Uses a variety of backend technologies (GPU,...)
-  \end{itemize}
-
-  \vfill
-  Writing the code is faster because:
-  \begin{itemize}
-  \item High-level language allows to {\bf concentrate on the algorithm}
-  \item Theano do {\bf automatic optimization}
-    \begin{itemize}
-    \item No need to manually optimize for each algorithm you want to test
-    \end{itemize}
-  \item Theano do {\bf automatic efficient symbolic differentiation}
-    \begin{itemize}
-    \item No need to manually differentiate your functions (tedious \& error-prone for complicated expressions!)
-    \end{itemize}
-  \end{itemize}
-}
-
-\newcommand\codeHighlight[1]{\textcolor[rgb]{1,0,0}{\textbf{#1}}}
-\subsection{Simple Example}
-
-\begin{frame}[fragile]
-  \frametitle{Simple Example}
-\begin{Verbatim}[commandchars=\\\{\}]
-import theano
-a = theano.tensor.vector("a") {\color{gray} # declare symbolic variable}
-b = a + a**10                 {\color{gray} # build symbolic expression}
-f = theano.function([a], b)   {\color{gray} # compile function}
-print f([0,1,2])              {\color{gray} # prints `array([0,2,1026])`}
-\end{Verbatim}
-  \includegraphics[width=1.2in]{pics/f_unoptimized.png}
-\end{frame}
-
-\frame{
-  \frametitle{Simple Example: Optimized graph}
-             {\bf no pow, fused elemwise op!}
-
-  \includegraphics[width=2.3in]{pics/f_optimized.png}
-
-  Symbolic programming
-  \begin{itemize}
-  \item Paradigm shift: people need to use it to understand it
-  \end{itemize}
-}
-
-\begin{frame}[fragile]
-  \frametitle{Exercises 1}
-  \begin{Verbatim}
-source /groups/h/hpc2011/bin/GPU.csh
-hg clone http://hg.assembla.com/theano Theano
-cd Theano/doc/hpcs2011_tutorial
-python simple_example.py
-  \end{Verbatim}
-  \vfill
-Modify and execute the example to do this expression: a**2 + b**2 + 2*a*b
-\end{frame}
-
-\subsection{Real Example}
-\frame{
-  \frametitle{A Real Example: Logistic Regression}
-  \begin{itemize}
-  \item GPU-ready
-  \item Symbolic differentiation
-  \item Speed optimizations
-  \item Stability optimizations
-  \end{itemize}
-}
-
-\begin{frame}[fragile]
-  \frametitle{A Real Example: Logistic Regression}
-\begin{Verbatim}[commandchars=\\\{\}]
-import numpy
-import theano
-import theano.tensor as tt
-rng = numpy.random
-
-N = 400
-feats = 784
-D = (rng.randn(N, feats), rng.randint(size=N,low=0, high=2))
-training_steps = 10000
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{A Real Example: Logistic Regression}
-\begin{Verbatim}[commandchars=\\\{\}]
-{\color{gray}# Declare Theano symbolic variables}
-x = tt.matrix("x")
-y = tt.vector("y")
-\codeHighlight{w = theano.shared(rng.randn(100), name="w")}
-\codeHighlight{b = theano.shared(0., name="b")}
-print "Initial model:"
-print w.get_value(), b.get_value()
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{A Real Example: Logistic Regression}
-\begin{Verbatim}[commandchars=\\\{\}]
-{\color{gray}# Declare Theano symbolic variables}
-{\color{gray}x = tt.matrix("x")}
-{\color{gray}y = tt.vector("y")}
-{\color{gray}w = theano.shared(rng.randn(100), name="w")}
-{\color{gray}b = theano.shared(0., name="b")}
-
-{\color{gray}# Construct Theano expression graph}
-p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b))    {\color{gray}# Probability that target = 1}
-prediction = p_1 > 0.5                   {\color{gray}# The prediction thresholded}
-xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1){\color{gray}# Cross-entropy loss function}
-cost = xent.mean() + 0.01*(w**2).sum()   {\color{gray}# The cost to minimize}
-\codeHighlight{gw,gb = tt.grad(cost, [w,b])}
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{A Real Example: Logistic Regression}
-\begin{Verbatim}[commandchars=\\\{\}]
-{\color{gray}x = tt.matrix("x")}
-{\color{gray}y = tt.vector("y")}
-{\color{gray}w = theano.shared(rng.randn(100), name="w")}
-{\color{gray}b = theano.shared(0., name="b")}
-{\color{gray}p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b))}
-{\color{gray}prediction = p_1 > 0.5}
-{\color{gray}xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)}
-{\color{gray}cost = xent.mean() + 0.01*(w**2).sum()}
-{\color{gray}gw,gb = tt.grad(cost, [w,b])}
-
-{\color{gray}# Compile}
-train = theano.function(
-            inputs=[x,y],
-            \codeHighlight{outputs=[prediction, xent]},
-            \codeHighlight{updates=\{w:w-0.1*gw, b:b-0.1*gb\}})
-predict = theano.function(inputs=[x], outputs=prediction)
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{A Real Example: Logistic Regression}
-\begin{Verbatim}[commandchars=\\\{\}]
-{\color{gray}# Train}
-for i in range(training_steps):
-    pred, err = train(D[0], D[1])
-
-print "Final model:"
-print w.get_value(), b.get_value()
-print "target values for D:", D[1]
-print "prediction on D:", predict(D[0])
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{A Real Example: optimization}
-\begin{Verbatim}[commandchars=\\\{\}]
-p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b))
-xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
-prediction = p_1 > 0.5
-cost = xent.mean() + 0.01*(w**2).sum()
-gw,gb = tt.grad(cost, [w,b])
-
-train = theano.function(
-            inputs=[x,y],
-            outputs=[prediction, xent],
-            updates=\{w:w-0.1*gw, b:b-0.1*gb\})  {\color{gray}# This is a dictionary}
-\end{Verbatim}
-Where are those optimization applied?
-\begin{itemize}
-\item Log(1+exp(x))
-\item 1 / (1 + tt.exp(var)) (sigmoid)
-\item Log(1-sigmoid(var)) (softplus, stabilisation)
-\item GEMV (matrix-vector multiply from BLAS)
-\item Loop fusion
-\end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{A Real Example: optimization!}
-\begin{Verbatim}[commandchars=\\\{\}]
-p_1 = 1 / (1 + tt.exp(-T.dot(x, w)-b))
-\codeHighlight{# 1 / (1 + tt.exp(var)) -> sigmoid(var)}
-xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
-\codeHighlight{# Log(1-sigmoid(var)) -> -sigmoid(var)}
-
-prediction = p_1 > 0.5
-cost = xent.mean() + 0.01*(w**2).sum()
-gw,gb = tt.grad(cost, [w,b])
-
-train = theano.function(
-            inputs=[x,y],
-            outputs=[prediction, xent],
-\codeHighlight{# w-0.1*gw: GEMV with the dot in the grad}
-            updates=\{w:w-0.1*gw, b:b-0.1*gb\})
-
-\end{Verbatim}
-\begin{itemize}
-\item Loop fusion in many places
-\end{itemize}
-\end{frame}
-
-\subsection{Theano Flags}
-\frame{
-\frametitle{Theano Flags}
-Theano can be configured with flags. They can be defined in two ways
-\begin{itemize}
-\item With an environment variable: \texttt{THEANO\_FLAGS="mode=ProfileMode,ProfileMode.profile\_memory=True"}
-\item With a configuration file that defaults to \textasciitilde/.theanorc
-\end{itemize}
-}
-
-\begin{frame}[fragile]
-\frametitle{Exercises 2}
-\begin{Verbatim}
-python logreg_example.py
-\end{Verbatim}
-\vfill
-Modify and execute the example in the file logreg\_example.py to run on CPU with floatX=float32
-
-* You will need to use: theano.config.floatX and ndarray.astype("str")
-\end{frame}
-
-\subsection{GPU}
-\frame{
-\frametitle{GPU}
-\begin{itemize}
-\item Only 32 bit floats are supported (being worked on)
-\item Only 1 GPU per process
-\item Use the Theano flag \texttt{device=gpu} to tell to use the GPU device
-  \begin{itemize}
-  \item Use \texttt{device=gpu{0, 1, ...}} to specify which GPU if you have more than one
-  \item Shared variables with float32 dtype are by default moved to the GPU memory space
-  \end{itemize}
-\item Use the Theano flag \texttt{floatX=float32}
-  \begin{itemize}
-  \item Be sure to use \texttt{floatX} (\texttt{theano.config.floatX}) in your code
-  \item Cast inputs before putting them into a shared variable
-  \item Cast "problem": int32 with float32 $\to$ float64
-    \begin{itemize}
-    \item A new casting mechanism is being developed
-    \item Insert manual cast in your code or use [u]int{8,16}
-    \item Insert manual cast around the mean operator (which involves a division by the length, which is an int64!)
-    \end{itemize}
-  \end{itemize}
-\end{itemize}
-}
-
-\frame{
-\frametitle{GPU for Exercises}
-\begin{itemize}
-\item Intel Core i7 980 XE (107Gf/s float64, 1050\$, 6 cores/12 threads)
-\item NVIDIA C2050 (515 Gf/s float64, 1Tf/s float32, 2400\$, 480 cores), compute capability 2.0
-\item NVIDIA GTX580 (1.5Tf/s float32, 500\$, 512 cores), compute capability 2.0
-\end{itemize}
-Computers in the class
-\begin{itemize}
-\item Intel Xeon X3450 (?56? flops/s, 383\$, 4 cores)
-\item NVIDIA Quadro FX 580 (71GF/s single, 140\$, 32 cores), compute capability 1.1, 'profesionnal card'
-% BLAS on the cpu took 48s, 4s on the GPU
-\end{itemize}
-
-%Device 0: "Quadro FX 580"
-% Total amount of global memory:                 536150016 bytes
-% Multiprocessors x Cores/MP = Cores:            4 (MP) x 8 (Cores/MP) = 32 (Cores)
-% Clock rate:                                    1.12 GHz
-% Run time limit on kernels:                     Yes
-% Compute mode:                                  Default (multiple host
-%threads can use this device simultaneously)
-}
-
-\begin{frame}
-\frametitle{Exercises 3}
-
-\begin{itemize}
-\item Modify and execute the code to run with floatX=float32 on GPU
-\item Time with: \texttt{time python file.py}
-\end{itemize}
-\end{frame}
-
-\subsection{Symbolic Variables}
-\frame{
-  \frametitle{Creating symbolic variables}
-  \begin{itemize}
-  \item \# Dimensions
-    \begin{itemize}
-    \item tt.scalar, tt.vector, tt.matrix, tt.tensor3, tt.tensor4
-    \end{itemize}
-  \item Dtype
-    \begin{itemize}
-    \item tt.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
-    \item tt.vector $\to$ floatX dtype
-    \item floatX: configurable dtype that can be float32 or float64.
-    \end{itemize}
-
-  \item Custom variable
-    \begin{itemize}
-    \item All are shortcuts to: tt.tensor(dtype, broadcastable=[False]*nd)
-    \item Other dtype: uint[8,16,32,64], floatX
-    \end{itemize}
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{Creating symbolic variables: Broadcastability}
-  \begin{itemize}
-  \item Remember what I said about broadcasting?
-  \item How to add a row to all rows of a matrix?
-  \item How to add a column to all columns of a matrix?
-  \end{itemize}
-  \vfill
-  \begin{itemize}
-  \item Broadcastability must be specified when creating the variable
-  \item The only shorcut with broadcastable dimensions are: {\bf tt.row} and {\bf tt.col}
-  \item For all others: tt.tensor(dtype, broadcastable={\bf ([False or True])*nd})
-  \end{itemize}
-}
-
-\subsection{Differentiation Details}
-\begin{frame}[fragile]
-  \frametitle{Differentiation Details}
-\begin{Verbatim}[commandchars=\\\{\}]
-{\color{gray}gw,gb = tt.grad(cost, [w,b])}
-\end{Verbatim}
-\begin{itemize}
-\item tt.grad works symbolically: takes and returns a Theano variable
-\item tt.grad can be compared to a macro: it can be applied multiple times
-\item tt.grad takes scalar costs only
-\item Simple recipe allows to compute efficiently vector $\times$ Jacobian and vector $\times$ Hessian
-\item We are working on the missing optimizations to be able to compute efficently the full Jacobian and Hessian and Jacobian $\times$ vector
-\end{itemize}
-\end{frame}
-
-\subsection{Benchmarks}
-\frame{
-\frametitle{Benchmarks}
-Example:
-\begin{itemize}
-\item Multi-layer perceptron
-\item Convolutional Neural Networks
-\item Misc Elemwise operations
-\end{itemize}
-
-Competitors: NumPy + SciPy, MATLAB, EBLearn, Torch5, numexpr
-\begin{itemize}
-\item EBLearn, Torch5: specialized libraries written by practitioners specifically for these tasks
-\item numexpr: similar to Theano, 'virtual machine' for elemwise expressions
-\end{itemize}
-}
-
-\frame{
-\frametitle{Benchmark MLP}
-Multi-Layer Perceptron: 60x784 matrix times 784x500 matrix, tanh, times 500x10 matrix, elemwise, then all in reverse for backpropagation
-\begin{center}
-\includegraphics[width=3.in]{pics/mlp.pdf}
-\end{center}
-
-}
-
-\frame{
-\frametitle{Benchmark Convolutional Network}
-Convolutional Network: 256x256 images convolved with 6 7x7 filters, downsampled to 6x50x50, tanh, convolution with 16 6x7x7 filter, elementwise tanh, matrix multiply, softmax elementwise, then in reverse
-\begin{center}
-\includegraphics[width=3.in]{pics/conv.pdf}
-\end{center}
-}
-
-\frame{
-\frametitle{Elemwise Benchmark}
-\begin{itemize}
-\item All on CPU
-\item Solid blue: Theano
-\item Dashed Red: numexpr (without MKL)
-\end{itemize}
-\begin{center}
-\includegraphics[width=2.8in]{pics/multiple_graph.pdf}
-\end{center}
-}
-
-\section{Advanced Theano}
-\subsection{Optimizations}
-\frame{
-\frametitle{Compilation Pipeline}
-\begin{center}
-\includegraphics[width=2.7in]{pics/pipeline.pdf}
-\end{center}
-}
-
-\frame{
-\frametitle{Inplace Optimization}
-\begin{itemize}
-\item 2 type of inplace operations:
-  \begin{itemize}
-  \item An op that return a view on its inputs (e.g. reshape, inplace transpose)
-  \item An op that write the output on the inputs memory space
-  \end{itemize}
-\item This allows some memory optimization
-\item The Op must tell Theano if they work inplace
-\item Inplace Op add constraints to the order of execution
-\end{itemize}
-}
-
-\subsection{Profiling}
-\begin{frame}[fragile]
-\frametitle{Profile Mode}
-To replace the default mode with this mode, use the Theano flags \texttt{mode=ProfileMode}
-
-To enable the memory profiling use the flags \texttt{ProfileMode.profile\_memory=True}
-\begin{Verbatim}
-Time since import 33.456s
-Theano compile time: 1.023s (3.1% since import)
-    Optimization time: 0.789s
-    Linker time: 0.221s
-Theano fct call 30.878s (92.3% since import)
-   Theano Op time 29.411s 87.9%(since import) 95.3%(of fct call)
-   Theano function overhead in ProfileMode 1.466s 4.4%(since import)
-                                                  4.7%(of fct call)
-10001 Theano fct call, 0.003s per call
-Rest of the time since import 1.555s 4.6%
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Profile Mode: Function Summary}
-Theano outputs:
-\vfill
-\begin{Verbatim}
-Theano fct summary:
-<% total fct time> <total time> <time per call> <nb call> <fct name>
-   100.0% 30.877s 3.09e-03s 10000 train
-    0.0% 0.000s 4.06e-04s 1 predict
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Profile Mode: Single Op-Wise Summary}
-Theano outputs:
-\vfill
-\begin{Verbatim}
-Single Op-wise summary:
-<% of local_time spent on this kind of Op> <cumulative %>
-    <self seconds> <cumulative seconds> <time per call> <nb_call>
-    <nb_op> <nb_apply> <Op name>
-   87.3%   87.3%  25.672s  25.672s  2.57e-03s   10000  1  1 <Gemv>
-    9.7%   97.0%  2.843s  28.515s  2.84e-04s   10001  1  2 <Dot>
-    2.4%   99.3%  0.691s  29.206s  7.68e-06s * 90001 10 10 <Elemwise>
-    0.4%   99.7%  0.127s  29.334s  1.27e-05s   10000  1  1 <Alloc>
-    0.2%   99.9%  0.053s  29.386s  1.75e-06s * 30001  2  4 <DimShuffle>
-    0.0%  100.0%  0.014s  29.400s  1.40e-06s * 10000  1  1 <Sum>
-    0.0%  100.0%  0.011s  29.411s  1.10e-06s * 10000  1  1 <Shape_i>
-(*) Op is running a c implementation
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Profile Mode: Op-Wise Summary}
-Theano outputs:
-\vfill
-\begin{Verbatim}
-Op-wise summary:
-<% of local_time spent on this kind of Op> <cumulative %>
-    <self seconds> <cumulative seconds> <time per call>
-    <nb_call> <nb apply> <Op name>
-   87.3%   87.3%  25.672s  25.672s  2.57e-03s   10000  1 Gemv{inplace}
-    9.7%   97.0%  2.843s  28.515s  2.84e-04s   10001  2 dot
-    1.3%   98.2%  0.378s  28.893s  3.78e-05s * 10000  1 Elemwise{Composite{
-        scalar_softplus,{mul,scalar_softplus,{neg,mul,sub}}}}
-    0.4%   98.7%  0.127s  29.021s  1.27e-05s   10000  1 Alloc
-    0.3%   99.0%  0.092s  29.112s  9.16e-06s * 10000  1 Elemwise{Composite{
-        exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)]
-    0.1%   99.3%  0.033s  29.265s  1.66e-06s * 20001  3 InplaceDimShuffle{x}
-   ... (remaining 11 Apply account for 0.7%(0.00s) of the runtime)
-(*) Op is running a c implementation
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Profile Mode: Apply-Wise Summary}
-Theano outputs:
-\vfill
-\begin{Verbatim}
-Apply-wise summary:
-<% of local_time spent at this position> <cumulative %%>
-    <apply time> <cumulative seconds> <time per call>
-    <nb_call> <Apply position> <Apply Op name>
-   87.3%   87.3%  25.672s  25.672s 2.57e-03s  10000  15 Gemv{inplace}(
-        w, TensorConstant{-0.01}, InplaceDimShuffle{1,0}.0, Elemwise{Composite{exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)].0, TensorConstant{0.9998})
-    9.7%   97.0%  2.843s  28.515s 2.84e-04s  10000   1 dot(x, w)
-    1.3%   98.2%  0.378s  28.893s 3.78e-05s  10000   9 Elemwise{Composite{scalar_softplus,{mul,scalar_softplus,{neg,mul,sub}}}}(y, Elemwise{Composite{neg,sub}}[(0, 0)].0, Elemwise{sub,no_inplace}.0, Elemwise{neg,no_inplace}.0)
-    0.4%   98.7%  0.127s  29.020s 1.27e-05s  10000  10 Alloc(Elemwise{inv,no_inplace}.0, Shape_i{0}.0)
-    0.3%   99.0%  0.092s  29.112s 9.16e-06s  10000  13 Elemwise{Composite{exp,{mul,{true_div,neg,{add,mul}}}}}[(0, 0)](Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}, _op_use_c_code=True}}[(0, 0)].0, Alloc.0, y, Elemwise{Composite{neg,sub}}[(0, 0)].0, Elemwise{sub,no_inplace}.0, InplaceDimShuffle{x}.0)
-    0.3%   99.3%  0.080s  29.192s 7.99e-06s  10000  11 Elemwise{ScalarSigmoid{output_types_preference=transfer_type{0}, _op_use_c_code=True}}[(0, 0)](Elemwise{neg,no_inplace}.0)
-   ... (remaining 14 Apply instances account for
-       0.7%(0.00s) of the runtime)
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Profile Mode: Memory Profile}
-Theano outputs:
-\vfill
-\begin{Verbatim}
-Profile of Theano functions memory:
-(This check only the output of each apply node. It don't check the
-    temporary memory used by the op in the apply node.)
-Theano fct: train
-    Max without gc, inplace and view (KB) 2481
-    Max FAST_RUN_NO_GC (KB) 16
-    Max FAST_RUN (KB) 16
-    Memory saved by view (KB) 2450
-    Memory saved by inplace (KB) 15
-    Memory saved by GC (KB) 0
-    <Sum apply outputs (bytes)> <Apply outputs memory size(bytes)>
-        <created/inplace/view> <Apply node>
-    <created/inplace/view> is taked from the op declaration, not ...
-         2508800B  [2508800] v InplaceDimShuffle{1,0}(x)
-            6272B  [6272] i Gemv{inplace}(w, ...)
-            3200B  [3200] c Elemwise{Composite{...}}(y, ...)
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Profile Mode: Tips}
-Theano outputs:
-\vfill
-\begin{Verbatim}
-Here are tips to potentially make your code run faster
-(if you think of new ones, suggest them on the mailing list).
-Test them first, as they are not guaranteed to always provide a speedup.
-  - Try the Theano flag floatX=float32
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}
-\frametitle{Exercises 4}
-
-\begin{itemize}
-\item In the last exercises, do you see a speed up with the GPU?
-\item Where does it come from? (Use ProfileMode)
-\item Is there something we can do to speed up the GPU version?
-\end{itemize}
-\end{frame}
-
-
-\subsection{Printing}
-\begin{frame}[fragile]
-\frametitle{Text Printing of Your Theano Graph: Pretty Printing}
-theano.printing.pprint(variable)
-\vfill
-\begin{Verbatim}
->>> theano.printing.pprint(prediction)
-gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))),
-TensorConstant{0.5})
-\end{Verbatim}
-\end{frame}
-
-
-\begin{frame}[fragile]
-\frametitle{Text Printing of Your Theano Graph: Debug Print}
-theano.printing.debugprint({fct, variable, list of variables})
-\vfill
-\small
-\begin{Verbatim}
->>> theano.printing.debugprint(prediction)
-Elemwise{gt,no_inplace} [@181772236] ''
- |Elemwise{true_div,no_inplace} [@181746668] ''
- | |InplaceDimShuffle{x} [@181746412] ''
- | | |TensorConstant{1} [@181745836]
- | |Elemwise{add,no_inplace} [@181745644] ''
- | | |InplaceDimShuffle{x} [@181745420] ''
- | | | |TensorConstant{1} [@181744844]
- | | |Elemwise{exp,no_inplace} [@181744652] ''
- | | | |Elemwise{sub,no_inplace} [@181744012] ''
- | | | | |Elemwise{neg,no_inplace} [@181730764] ''
- | | | | | |dot [@181729676] ''
- | | | | | | |x [@181563948]
- | | | | | | |w [@181729964]
- | | | | |InplaceDimShuffle{x} [@181743788] ''
- | | | | | |b [@181730156]
- |InplaceDimShuffle{x} [@181771788] ''
- | |TensorConstant{0.5} [@181771148]
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Text Printing of Your Theano Graph: Debug Print}
-theano.printing.debugprint({fct, variable, list of variables})
-\vfill
-\small
-\begin{Verbatim}
->>> theano.printing.debugprint(predict)
-Elemwise{Composite{neg,{sub,{{scalar_sigmoid,GT},neg}}}} [@183160204] ''   2
- |dot [@183018796] ''   1
- | |x [@183000780]
- | |w [@183000812]
- |InplaceDimShuffle{x} [@183133580] ''   0
- | |b [@183000876]
- |TensorConstant{[ 0.5]} [@183084108]
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Picture Printing of Graphs}
-\begin{Verbatim}
->>> theano.printing.pydotprint_variables(prediction)
-\end{Verbatim}
-\includegraphics[width=1.9in]{pics/logreg_pydotprint_prediction.png}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Picture Printing of Graphs}
-\begin{Verbatim}
-All pydotprint* requires graphviz and pydot
->>> theano.printing.pydotprint(predict)
-\end{Verbatim}
-\includegraphics[width=4in]{pics/logreg_pydotprint_predic.png}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Picture Printing of Graphs}
-\begin{Verbatim}[commandchars=\\\{\}]
->>> theano.printing.pydotprint(train) {\color{gray}# This is a small train example!}
-\end{Verbatim}
-\hspace{-.8cm}
-\includegraphics[width=5.0in]{pics/logreg_pydotprint_train.png}
-\end{frame}
-
-
-\subsection{Debugging}
-\frame{
-\frametitle{How to Debug}
-\begin{itemize}
-\item Run with the flag \texttt{mode=DebugMode}
-  \begin{itemize}
-  \item 100-1000x slower
-  \item Test all optimization steps from the original graph to the final graph
-  \item Checks many things that Op should/shouldn't do
-  \item Executes both the Python and C code versions
-  \end{itemize}
-\item Run with the Theano flag \texttt{compute\_test\_value = {``off'', ``ignore'', ``warn'', ``raise''}}
-  \begin{itemize}
-  \item Run the code as we create the graph
-  \item Allows you to find the bug earlier (ex: shape mismatch)
-  \item Makes it easier to identify where the problem is in \textit{your} code
-  \item Use the value of constants and shared variables directly
-  \item For pure symbolic variables uses \texttt{x.tag.test\_value = numpy.random.rand(5,10)}
-  \end{itemize}
-\item Run with the flag \texttt{mode=FAST\_COMPILE}
-  \begin{itemize}
-  \item Few optimizations
-  \item Run Python code (better error messages and can be debugged \\ interactively in the Python debugger)
-  \end{itemize}
-\end{itemize}
-}
-
-\subsection{Loops}
-\frame{
-\frametitle{Scan}
-\begin{itemize}
-\item General form of {\bf recurrence}, which can be used for looping.
-\item {\bf Reduction} and {\bf map}(loop over the leading dimensions) are special cases of Scan
-\item You *scan* a function along some input sequence, producing an
-  output at each time-step
-\item The function can see the {\bf previous K time-steps} of your function
-\item ``sum()`` could be computed by scanning the $z + x_i$ function
-  over a list, given an initial state of ``z=0``.
-\item Often a for-loop can be expressed as a ``scan()`` operation, and
-  ``scan`` is the closest that Theano comes to looping.
-\item The advantage of using ``scan`` over for loops
-  \begin{itemize}
-  \item The number of iterations to be part of the symbolic graph
-  \item Minimizes GPU transfers if GPU is involved
-  \item Compute gradients through sequential steps
-  \item Slightly faster then using a for loop in Python with a compiled Theano function
-  \item Can lower the overall memory usage by detecting the actual \\ amount of memory needed
-  \end{itemize}
-\end{itemize}
-}
-
-\begin{frame}[fragile]
-\frametitle{Scan Example: Computing pow(A,k)}
-\begin{Verbatim}
-k = tt.iscalar("k"); A = tt.vector("A")
-
-def inner_fct(prior_result, A): return prior_result * A
-# Symbolic description of the result
-result, updates = theano.scan(fn=inner_fct,
-                              outputs_info=T.ones_like(A),
-                              non_sequences=A, n_steps=k)
-
-# Scan has provided us with A**1 through A**k.  Keep only the last
-# value. Scan notices this and does not waste memory saving them.
-final_result = result[-1]
-
-power = theano.function(inputs=[A,k], outputs=final_result,
-                        updates=updates)
-
-print power(range(10),2)
-#[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Scan Example: Calculating a Polynomial}
-\begin{Verbatim}
-coefficients = theano.tensor.vector("coefficients")
-x = tt.scalar("x"); max_coefficients_supported = 10000
-
-# Generate the components of the polynomial
-full_range=theano.tensor.arange(max_coefficients_supported)
-components, updates = theano.scan(fn=lambda coeff, power, free_var:
-                                     coeff * (free_var ** power),
-                                  outputs_info=None,
-                                  sequences=[coefficients, full_range],
-                                  non_sequences=x)
-polynomial = components.sum()
-calculate_polynomial = theano.function(inputs=[coefficients, x],
-                                       outputs=polynomial)
-
-test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
-print calculate_polynomial(test_coeff, 3)
-# 19.0
-\end{Verbatim}
-\end{frame}
-
-\frame{
-\frametitle{Exercises 5}
-\begin{itemize}
-\item Run the example in the file scan\_pow.py and scan\_poly.py
-\item Modify and execute the polynomial example to have the reduction done by scan
-\end{itemize}
-}
-
-\frame{
-\frametitle{Known Limitations}
-\begin{itemize}
-\item Compilation phase distinct from execution phase
-\item Compilation time can be significant
-  \begin{itemize}
-  \item Amortize it with functions over big input or reuse functions
-  \end{itemize}
-\item Execution overhead
-  \begin{itemize}
-  \item Needs a certain number of operations to be useful
-  \item We have started working on this in a branch
-  \end{itemize}
-\item Compilation time superlinear in the size of the graph.
-  \begin{itemize}
-  \item A few hundreds nodes is fine
-  \item Disabling a few optimizations can speed up compilation
-  \item Usually too many nodes indicates a problem with the graph
-  \end{itemize}
-\item Lazy evaluation in a branch (We will try to merge this summer)
-\end{itemize}
-
-}
-
-\section{PyCUDA}
-\subsection{PyCUDA}
-
-\begin{frame}[fragile]
-\frametitle{PyCUDA}
-\begin{center}
-\includegraphics[width=2.5in]{pics/pycuda-logo-crop.pdf}
-\end{center}
-\end{frame}
-
-
-\frame{
-\frametitle{Intro}
-Authors: Andreas Kl\"{o}ckner
-
-\begin{itemize}
-\item PyCUDA can access Nvidia's CUDA parallel computation API from Python
-\item Object cleanup tied to lifetime of objects (RAII, Resource Acquisition Is Initialization).
-  \begin{itemize}
-  \item Makes it much easier to write correct, leak- and crash-free code
-  \item PyCUDA knows about dependencies (e.g.. it won't detach from a context before all memory allocated in it is also freed)
-  \end{itemize}
-\item Convenience
-  \begin{itemize}
-  \item Abstractions to compile CUDA code from Python: \texttt{pycuda.driver.SourceModule}
-  \item A GPU memory buffer: \texttt{pycuda.gpuarray.GPUArray}
-  \end{itemize}
-\item Completeness
-  \begin{itemize}
-  \item Binding to all of CUDA's driver API
-  \end{itemize}
-\item Automatic Error Checking
-  \begin{itemize}
-  \item All CUDA errors are automatically translated into Python exceptions
-  \end{itemize}
-\item Speed
-  \begin{itemize}
-  \item PyCUDA's base layer is written in C++
-  \end{itemize}
-\item Helpful documentation
-\end{itemize}
-
-}
-
-\begin{frame}[fragile]
-\frametitle{Example}
-\begin{Verbatim}
-import pycuda.autoinit
-import pycuda.driver as drv
-import numpy
-
-from pycuda.compiler import SourceModule
-mod = SourceModule("""
-__global__ void multiply_them(float *dest, float *a, float *b)
-{
-  const int i = threadIdx.x;
-  dest[i] = a[i] * b[i];
-}
-""")
-
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Example}
-\begin{Verbatim}
-multiply_them = mod.get_function("multiply_them")
-
-a = numpy.random.randn(400).astype(numpy.float32)
-b = numpy.random.randn(400).astype(numpy.float32)
-
-dest = numpy.zeros_like(a)
-multiply_them(
-        drv.Out(dest), drv.In(a), drv.In(b),
-        block=(400,1,1), grid=(1,1))
-\end{Verbatim}
-\end{frame}
-
-%\frame{
-%\frametitle{GpuArray}
-%TODO: No support for strided memory.
-%}
-
-\section{CUDA}
-\subsection{CUDA Overview}
-\frame{
-\frametitle{GPU Programming: Gains and Losses}
-\begin{itemize}
-\item Gains:
-\begin{itemize}
-\item Memory Bandwidth (140GB/s vs 12 GB/s)
-\item Compute Bandwidth( Peak: 1 TF/s vs 0.1 TF/s in float)
-\item Data-parallel programming
-\end{itemize}
-
-\item Losses:
-\begin{itemize}
-\item No performance portability guaranty
-\item Data size influence more the implementation code on GPU
-\item Cheap branches
-\item Fine-grained malloc/free*
-\item Recursion*
-\item Function pointers*
-\item IEEE 754FP compliance*
-\end{itemize}
-\end{itemize}
-
-* Less problematic with new hardware (NVIDIA Fermi)
-
-\small{\color{gray}[slide from Andreas Kl\"{o}ckner]}
-}
-
-\frame{
-\frametitle{CPU vs GPU Architecture}
-%\begin{center}
-\includegraphics[width=4.7in]{pics/CPU_VS_GPU.png}
-
-\small{\color{gray}Source NVIDIA CUDA\_C\_Programming\_Guide.pdf document}
-%\end{center}
-}
-
-\frame{
-\frametitle{Different GPU Block Repartition}
-\begin{center}
-\includegraphics[width=3.2in]{pics/bloc_repartition.png}
-
-\small{\color{gray}Source NVIDIA CUDA\_C\_Programming\_Guide.pdf document}
-\end{center}
-}
-
-\frame{
-\frametitle{GPU thread structure}
-\begin{center}
-\includegraphics[width=2.7in]{pics/grid_block_thread.png}
-
-\small{\color{gray}Source NVIDIA CUDA\_C\_Programming\_Guide.pdf document}
-\end{center}
-}
-
-\begin{frame}
-\frametitle{Exercises 6}
-\begin{itemize}
-\item Run the example in the file pycuda\_simple.py
-\item Modify and execute it to work for a matrix of 20 $\times$ 10
-\end{itemize}
-\end{frame}
-
-%\begin{frame}
-%\frametitle{PyCUDA Exercises:TODO MOVE?!?!?}
-%\begin{itemize}
-%\item Run the example
-%\item Modify it to multiple two matrix (rename it to MulMatrix)
-%\item Modify it to multiple two inputs with arbitrary number of dimensions
-%\end{itemize}
-%\end{frame}
-
-
-\section{Extending Theano}
-\subsection{Theano}
-\frame{
-\frametitle{Theano Graph}
-\begin{itemize}
-\item Theano works with symbolic graphs
-\item Those graphs are bi-partite graphs (graph with 2 types of nodes)
-\item Those 2 nodes types are Apply and Variable nodes
-\end{itemize}
-\begin{itemize}
-\item Inputs and Outputs are lists of Theano variables
-\end{itemize}
-\begin{center}
-\includegraphics[width=3.5in]{pics/apply_node.pdf}
-\end{center}
-}
-
-\begin{frame}[fragile]
-\frametitle{Op Contract}
-\begin{Verbatim}[commandchars=\\\{\}]
-class MyOp(Op):
-    def __eq__(self, other):
-    def __hash__(self):
-    def __str__(self):
-    def make_node(self, *inputs):
-
-{\color{gray}# Python implementation:}
-    def perform(self, node, inputs_storage, outputs_storage):
-{\color{gray}# C implementation:} [see theano web site]
-{\color{gray}# others implementation (pycuda, ...):}
-     def make_thunk(self, node, storage_map, _, _2):
-
-{\color{gray}# optional:}
-    def __init__(self, ...):
-    def grad(self, inputs, g):
-    def infer_shape(fgraph, node, (i0_shapes, ...))
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Op Example}
-\begin{Verbatim}
-import theano
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-
-
-class DoubleOp(Op):
-    def make_node(self, x):
-        x = theano.tensor.as_tensor_variable(x)
-        return Apply(self, [x], [x.type()])
-
-    def perform(self, node, inputs, output_storage):
-        x = inputs[0]
-        z = output_storage[0]
-        z[0] = x * 2
-
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Theano Op Example: Test it!}
-\begin{Verbatim}
-x = theano.tensor.matrix()
-f = theano.function([x],DoubleOp()(x))
-
-import numpy
-inp = numpy.random.rand(5,5)
-out = f(inp)
-assert numpy.allclose(inp*2, out)
-print inp
-print out
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}
-\frametitle{Exercises 7}
-\begin{itemize}
-\item Run the code in the file double\_op.py.
-\item Modify and execute to compute: $x * y$
-\item Modify and execute the example to return 2 outputs: $x + y$ and $x - y$
-  \begin{itemize}
-  \item Our current elemwise fusion generate computation with only 1 outputs
-  \end{itemize}
-\end{itemize}
-\end{frame}
-
-\subsection{Theano+PyCUDA}
-\begin{frame}[fragile]
-\frametitle{Theano+PyCUDA Op Example}
-\begin{Verbatim}
-import numpy
-import theano
-import theano.misc.pycuda_init
-from pycuda.compiler import SourceModule
-import theano.sandbox.cuda as cuda
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-
-
-class PyCUDADoubleOp(Op):
-    def make_node(self, inp):
-        inp = cuda.basic_ops.gpu_contiguous(
-           cuda.basic_ops.as_cuda_ndarray_variable(inp))
-        assert inp.dtype == "float32"
-        return Apply(self, [inp], [inp.type()])
-\end{Verbatim}
-\end{frame}
-
-
-\begin{frame}[fragile]
-\frametitle{Theano + PyCUDA Op Example: make\_thunk}
-\begin{Verbatim}
-    def make_thunk(self, node, storage_map, _, _2):
-        mod = SourceModule( THE_C_CODE )
-
-        pycuda_fct = mod.get_function("my_fct")
-        inputs = [ storage_map[v] for v in node.inputs]
-        outputs = [ storage_map[v] for v in node.outputs]
-        def thunk():
-            z = outputs[0]
-            if z[0] is None or z[0].shape!=inputs[0][0].shape:
-                z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
-            grid = (int(numpy.ceil(inputs[0][0].size / 512.)),1)
-            pycuda_fct(inputs[0][0], z[0], numpy.intc(inputs[0][0].size),
-                       block=(512,1,1), grid=grid)
-        return thunk
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}[fragile]
-\frametitle{Theano + PyCUDA Op Example: GPU Code}
-\begin{Verbatim}
-THE_C_CODE = """
-__global__ void my_fct(float * i0, float * o0, int size) {
-    int i = blockIdx.x*blockDim.x + threadIdx.x;
-    if(i<size){
-        o0[i] = i0[i]*2;
-    }
-}""")
-\end{Verbatim}
-\end{frame}
-
-
-\begin{frame}[fragile]
-\frametitle{Theano + PyCUDA Op Example: Test it!}
-\begin{Verbatim}
-x = theano.tensor.fmatrix()
-f = theano.function([x], PyCUDADoubleOp()(x))
-xv=numpy.ones((4,5), dtype="float32")
-
-assert numpy.allclose(f(xv), xv*2)
-print numpy.asarray(f(xv))
-\end{Verbatim}
-\end{frame}
-
-\begin{frame}
-\frametitle{Exercises 8}
-\begin{itemize}
-\item Run the example in the file pycuda\_double\_op.py
-\item Modify and execute the example to multiple two matrix: $x * y$
-\item Modify and execute the example to return 2 outputs: $x + y$ and $x - y$
-  \begin{itemize}
-  \item Our current elemwise fusion generate computation with only 1 outputs
-  \end{itemize}
-\item Modify and execute the example to support stride? (Don't force the input to be c contiguous)
-\end{itemize}
-\end{frame}
-
-\section{GpuNdArray}
-\subsection{GpuNdArray}
-\frame{
-\frametitle{Why a common GPU ndarray?}
-\begin{itemize}
-\item Currently there are at least 4 different GPU array data structures in use by Python packages
-  \begin{itemize}
-  \item CudaNdarray (Theano), GPUArray (PyCUDA), CUDAMatrix (cudamat), GPUArray (PyOpenCL), ...
-  \item There are even more if we include other languages
-  \end{itemize}
-\item All of them are a subset of the functionality of \texttt{numpy.ndarray} on the GPU
-\item Lots of duplicated effort
-  \begin{itemize}
-  \item GPU code is harder/slower to do {\bf correctly} and {\bf fast} than on the CPU/Python
-  \end{itemize}
-\item Lack of a common array API makes it harder to port/reuse code
-\item Also harder to find/distribute code
-\item Divides development work
-\end{itemize}
-
-}
-
-\frame{
-\frametitle{Design Goals}
-\begin{itemize}
-\item Make it VERY similar to \texttt{numpy.ndarray}
-\item Be compatible with both CUDA and OpenCL
-\item Have the base object accessible from C to allow collaboration with more projects, across high-level languages
-  \begin{itemize}
-  \item We want people from C, C++, Ruby, R, ... all use the same base GPU N-dimensional array
-  \end{itemize}
-\end{itemize}
-}
-
-\frame{
-\frametitle{Final GpuNdArray Note}
-\begin{itemize}
-\item Under development
-\item Will be the next GPU array container for Theano (this summer!)
-\item Probably also for PyCUDA, PyOpenCL
-\item Mailing list: http://lists.tiker.net/listinfo/gpundarray
-\end{itemize}
-
-}
-
-\section{Conclusion}
-\subsection{Conclusion}
-\frame{
-  \frametitle{Conclusion}
-  \begin{itemize}
-  \item I presented a tool that tries to be the holy grail in computing: {\bf easy to code} and {\bf fast to execute}!
-  \item Generates fast, custom CPU code \textit{and} GPU code
-  \item You can easily wrap existing CPU/GPU code with Theano
-  \item It {\bf works} and is {\bf used in the real world} by academic researchers \textit{and} industry
-  \end{itemize}
-}
-
-\frame{
-  \frametitle{Thanks}
-  \begin{itemize}
-  \item Thanks for attending this tutorial
-    \vfill
-  \item Thanks to our agencies that resources for this projects: Calcul Qu\'ebec, CIFAR, Compute Canada, FQRNT, MITACS, NSERC, SciNet, SHARCNET, Ubisoft and WestGrid.
-  \end{itemize}
-}
-
-\frame{
-%  \frametitle{}
-\center{\huge{Questions/Comments?}}
-}
-
-\end{document}
diff --git a/doc/hpcs2011_tutorial/pycuda_double_op.py b/doc/hpcs2011_tutorial/pycuda_double_op.py
deleted file mode 100644
index 306f4a1147..0000000000
--- a/doc/hpcs2011_tutorial/pycuda_double_op.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import numpy
-from pycuda.compiler import SourceModule
-
-import theano
-import theano.misc.pycuda_init
-import theano.sandbox.cuda as cuda
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-
-
-class PyCUDADoubleOp(Op):
-    def __eq__(self, other):
-        return type(self) == type(other)
-
-    def __hash__(self):
-        return hash(type(self))
-
-    def __str__(self):
-        return self.__class__.__name__
-
-    def make_node(self, inp):
-        inp = cuda.basic_ops.gpu_contiguous(
-            cuda.basic_ops.as_cuda_ndarray_variable(inp)
-        )
-        assert inp.dtype == "float32"
-        return Apply(self, [inp], [inp.type()])
-
-    def make_thunk(self, node, storage_map, _, _2):
-        mod = SourceModule(
-            """
-    __global__ void my_fct(float * i0, float * o0, int size) {
-    int i = blockIdx.x*blockDim.x + threadIdx.x;
-    if(i<size){
-        o0[i] = i0[i]*2;
-    }
-  }"""
-        )
-        pycuda_fct = mod.get_function("my_fct")
-        inputs = [storage_map[v] for v in node.inputs]
-        outputs = [storage_map[v] for v in node.outputs]
-
-        def thunk():
-            z = outputs[0]
-            if z[0] is None or z[0].shape != inputs[0][0].shape:
-                z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
-            grid = (int(numpy.ceil(inputs[0][0].size / 512.0)), 1)
-            pycuda_fct(
-                inputs[0][0],
-                z[0],
-                numpy.intc(inputs[0][0].size),
-                block=(512, 1, 1),
-                grid=grid,
-            )
-
-        return thunk
-
-
-x = theano.tensor.fmatrix()
-f = theano.function([x], PyCUDADoubleOp()(x))
-xv = numpy.ones((4, 5), dtype="float32")
-
-assert numpy.allclose(f(xv), xv * 2)
-print(numpy.asarray(f(xv)))
diff --git a/doc/hpcs2011_tutorial/pycuda_simple.py b/doc/hpcs2011_tutorial/pycuda_simple.py
deleted file mode 100644
index d962a30471..0000000000
--- a/doc/hpcs2011_tutorial/pycuda_simple.py
+++ /dev/null
@@ -1,26 +0,0 @@
-
-import pycuda.autoinit
-import pycuda.driver as drv
-import numpy as np
-
-from pycuda.compiler import SourceModule
-mod = SourceModule("""
-__global__ void multiply_them(float *dest, float *a, float *b)
-{
-  const int i = threadIdx.x;
-  dest[i] = a[i] * b[i];
-}
-""")
-
-multiply_them = mod.get_function("multiply_them")
-
-a = np.random.randn(400).astype(np.float32)
-b = np.random.randn(400).astype(np.float32)
-
-dest = np.zeros_like(a)
-multiply_them(
-        drv.Out(dest), drv.In(a), drv.In(b),
-        block=(400,1,1), grid=(1,1))
-
-assert np.allclose(dest, a*b)
-print(dest)
diff --git a/doc/hpcs2011_tutorial/scan_poly.py b/doc/hpcs2011_tutorial/scan_poly.py
deleted file mode 100644
index 864b594a15..0000000000
--- a/doc/hpcs2011_tutorial/scan_poly.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import numpy as np
-
-import theano
-import theano.tensor as tt
-
-coefficients = theano.tensor.vector("coefficients")
-x = tt.scalar("x")
-max_coefficients_supported = 10000
-
-# Generate the components of the polynomial
-full_range = theano.tensor.arange(max_coefficients_supported)
-components, updates = theano.scan(
-    fn=lambda coeff, power, free_var: coeff * (free_var ** power),
-    outputs_info=None,
-    sequences=[coefficients, full_range],
-    non_sequences=x,
-)
-polynomial = components.sum()
-calculate_polynomial = theano.function(inputs=[coefficients, x], outputs=polynomial)
-
-test_coeff = np.asarray([1, 0, 2], dtype=np.float32)
-print(calculate_polynomial(test_coeff, 3))
-# 19.0
diff --git a/doc/hpcs2011_tutorial/scan_pow.py b/doc/hpcs2011_tutorial/scan_pow.py
deleted file mode 100644
index 76fb56e4c4..0000000000
--- a/doc/hpcs2011_tutorial/scan_pow.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import theano
-import theano.tensor as tt
-
-
-k = tt.iscalar("k")
-A = tt.vector("A")
-
-
-def inner_fct(prior_result, A):
-    return prior_result * A
-
-
-# Symbolic description of the result
-result, updates = theano.scan(
-    fn=inner_fct, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k
-)
-
-# Scan has provided us with A**1 through A**k.  Keep only the last
-# value. Scan notices this and does not waste memory saving them.
-final_result = result[-1]
-
-power = theano.function(inputs=[A, k], outputs=final_result, updates=updates)
-
-print(power(list(range(10)), 2))
diff --git a/doc/hpcs2011_tutorial/simple_example.py b/doc/hpcs2011_tutorial/simple_example.py
deleted file mode 100644
index 9346f68b6c..0000000000
--- a/doc/hpcs2011_tutorial/simple_example.py
+++ /dev/null
@@ -1,10 +0,0 @@
-
-import theano
-a = theano.tensor.vector("a") # declare variable
-b = a + a**10                 # build symbolic expression
-f = theano.function([a], b)   # compile function
-print(f([0,1,2]))
-# prints `array([0,2,1026])`
-
-theano.printing.pydotprint(b, outfile="pics/f_unoptimized.png", var_with_name_simple=True)
-theano.printing.pydotprint(f, outfile="pics/f_optimized.png", var_with_name_simple=True)
diff --git a/doc/images/snake_theta2.png b/doc/images/snake_theta2.png
deleted file mode 100644
index 45f7520ae2..0000000000
Binary files a/doc/images/snake_theta2.png and /dev/null differ
diff --git a/doc/images/theano-theta-117x117.png b/doc/images/theano-theta-117x117.png
deleted file mode 100644
index c945683863..0000000000
Binary files a/doc/images/theano-theta-117x117.png and /dev/null differ
diff --git a/doc/images/theano_bw_parens_transparent.png b/doc/images/theano_bw_parens_transparent.png
deleted file mode 100644
index 9407d037eb..0000000000
Binary files a/doc/images/theano_bw_parens_transparent.png and /dev/null differ
diff --git a/doc/images/theano_logo-200x67.png b/doc/images/theano_logo-200x67.png
deleted file mode 100644
index d98704d7b1..0000000000
Binary files a/doc/images/theano_logo-200x67.png and /dev/null differ
diff --git a/doc/images/theano_logo.png b/doc/images/theano_logo.png
deleted file mode 100644
index 5157c7d25d..0000000000
Binary files a/doc/images/theano_logo.png and /dev/null differ
diff --git a/doc/images/theano_logo_allblue.svg b/doc/images/theano_logo_allblue.svg
deleted file mode 100644
index 5de43cc614..0000000000
--- a/doc/images/theano_logo_allblue.svg
+++ /dev/null
@@ -1,88 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="345.86591"
-   height="115.13724"
-   id="svg2"
-   sodipodi:version="0.32"
-   inkscape:version="0.46"
-   sodipodi:docbase="/home/olivier/hg/theano"
-   sodipodi:docname="theano_logo_allblue.svg"
-   inkscape:output_extension="org.inkscape.output.svg.inkscape"
-   version="1.0"
-   inkscape:export-filename="theano_logo_allblue_200x46.png"
-   inkscape:export-xdpi="53.889999"
-   inkscape:export-ydpi="53.889999">
-  <defs
-     id="defs4">
-    <inkscape:perspective
-       sodipodi:type="inkscape:persp3d"
-       inkscape:vp_x="0 : 57.568619 : 1"
-       inkscape:vp_y="0 : 1000 : 0"
-       inkscape:vp_z="345.86591 : 57.568619 : 1"
-       inkscape:persp3d-origin="172.93295 : 38.379079 : 1"
-       id="perspective10" />
-  </defs>
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1.0"
-     gridtolerance="10000"
-     guidetolerance="10"
-     objecttolerance="10"
-     inkscape:pageopacity="0.0"
-     inkscape:pageshadow="2"
-     inkscape:zoom="1.4"
-     inkscape:cx="187.28443"
-     inkscape:cy="-18.63669"
-     inkscape:document-units="px"
-     inkscape:current-layer="layer1"
-     inkscape:window-width="1280"
-     inkscape:window-height="999"
-     inkscape:window-x="0"
-     inkscape:window-y="6"
-     showguides="true"
-     inkscape:guide-bbox="true"
-     showgrid="false" />
-  <metadata
-     id="metadata7">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <g
-     inkscape:label="Layer 1"
-     inkscape:groupmode="layer"
-     id="layer1"
-     transform="translate(-219.06115,-88.23416)">
-    <path
-       id="path5572"
-       d="M 361.71053,188.16928 C 354.76773,186.82023 350.70665,184.96687 346.79586,179.36429 C 343.95656,175.29673 343.50062,171.96473 343.44682,166.23478 C 343.42111,163.49783 343.49849,162.90428 344.0571,161.55334 L 344.69681,160.00626 L 345.45913,161.65784 C 346.336,163.55761 347.19403,164.90066 347.75802,165.25619 C 348.09328,165.46756 348.18083,165.02619 348.36169,162.21236 C 348.47801,160.40227 348.65102,158.40413 348.74614,157.77203 C 348.91863,156.62587 348.92314,156.62125 350.43384,156.04681 C 353.31299,154.95199 358.66778,154.81233 368.31255,155.58049 C 372.73402,155.93264 382.24935,156.1272 383.25884,155.25172 C 384.30569,154.34386 384.43282,153.93027 384.10183,151.44246 C 383.71094,148.50433 384.12609,147.83778 382.88247,145.3862 C 379.65727,139.02834 374.30749,136.3197 367.23085,136.30203 C 360.85744,136.28611 355.74516,138.6184 352.65434,143.81025 C 349.93524,148.37768 348.72275,153.27364 348.00646,162.57805 L 347.80539,165.18998 L 347.19983,164.32026 C 344.3014,160.1573 342.50418,152.88816 343.75077,147.35745 C 344.71627,143.07377 346.68707,139.35299 349.88566,135.77517 C 352.64732,132.68608 357.19089,130.91181 362.63982,129.87232 C 366.14917,129.20284 368.55034,129.20284 372.05972,129.87232 C 377.50862,130.91181 381.45763,132.89753 384.21929,135.98663 C 387.38405,139.52666 389.38596,143.28142 390.31905,147.42734 C 390.9488,150.22555 391.05131,154.47027 390.52149,155.81012 C 390.05713,156.98445 386.63479,160.80479 386.51965,160.27735 C 386.47234,160.06056 386.57091,159.6883 386.73869,159.45016 C 387.21007,158.78114 386.7839,158.506 386.0678,159.01703 C 385.51175,159.41383 385.41465,159.42491 385.09066,159.12836 C 384.40717,158.50279 384.15557,159.15234 384.78277,159.92321 C 385.50823,160.81483 385.52111,161.11534 384.8339,161.11534 C 384.54022,161.11534 384.23228,161.1985 384.14958,161.30014 C 383.49524,162.1044 373.34534,162.29368 365.28662,161.65193 C 359.32839,161.17745 354.58998,161.2787 353.48216,161.90416 C 352.57,162.41919 351.36222,164.23874 350.95433,165.71238 C 350.56679,167.11253 350.79145,171.3621 351.34313,173.06677 C 351.89991,174.78722 353.87377,177.29276 355.93375,178.89384 C 361.69246,183.36976 371.19795,183.42111 376.98801,179.00757 C 379.89915,176.78852 382.24934,173.22363 381.55941,172.07343 C 381.38459,171.78199 381.24154,171.2151 381.24154,170.81367 C 381.24154,170.08392 382.07691,168.57788 382.62419,168.32095 C 383.39593,167.95867 384.26748,166.51466 384.26748,165.59831 C 384.26748,164.53724 384.64213,163.83178 385.20558,163.83178 C 386.08301,163.83178 385.97918,161.1084 385.05727,159.94142 C 384.34884,159.04465 384.41288,158.86039 385.23029,159.44369 C 385.75764,159.82002 385.91798,159.85884 385.91798,159.61018 C 385.91798,159.43183 386.12685,159.15685 386.38212,158.99912 C 386.79684,158.74289 386.81881,158.75684 386.58845,159.13026 C 386.44664,159.3601 386.36682,160.4781 386.41105,161.61465 C 386.48402,163.48937 386.5507,163.71649 387.12987,164.06252 C 387.65138,164.37413 387.79471,164.70362 387.91265,165.86178 C 388.01423,166.85943 388.3207,167.70135 388.94694,168.70291 C 389.67118,169.86121 389.81092,170.30137 389.69736,171.06666 C 389.61881,171.59609 389.31517,172.20689 389.00266,172.46425 C 388.31336,173.03183 385.52096,176.70451 385.23933,177.41397 C 384.72888,178.69974 383.01411,181.39832 381.89492,182.67712 C 380.16478,184.65398 378.29724,185.84229 375.07078,187.01932 C 370.25045,188.77779 366.50239,189.10039 361.71053,188.16928 z"
-       style="fill:#11557c;fill-opacity:1;stroke:#ffffff;stroke-width:0.73518676000000005;stroke-opacity:1"
-       sodipodi:nodetypes="csscccsssssssssscccsssssssssssssssssssssssssssssssssssssssc" />
-    <text
-       xml:space="preserve"
-       style="font-size:96px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:100%;writing-mode:lr-tb;text-anchor:start;fill:#11557c;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:DejaVu Sans Mono;-inkscape-font-specification:DejaVu Sans Mono"
-       x="221.28195"
-       y="185.3815"
-       id="text2382"
-       sodipodi:linespacing="100%"><tspan
-         sodipodi:role="line"
-         id="tspan2386"
-         x="221.28195"
-         y="185.3815">th ano</tspan></text>
-  </g>
-</svg>
diff --git a/doc/images/theano_logo_allblue_200x46.png b/doc/images/theano_logo_allblue_200x46.png
deleted file mode 100644
index 1957abfc8c..0000000000
Binary files a/doc/images/theano_logo_allblue_200x46.png and /dev/null differ
diff --git a/doc/images/theano_logo_allblue_200x54.png b/doc/images/theano_logo_allblue_200x54.png
deleted file mode 100644
index d785898833..0000000000
Binary files a/doc/images/theano_logo_allblue_200x54.png and /dev/null differ
diff --git a/doc/images/theano_logo_allblue_350x95.png b/doc/images/theano_logo_allblue_350x95.png
deleted file mode 100644
index 3ea78ff5b5..0000000000
Binary files a/doc/images/theano_logo_allblue_350x95.png and /dev/null differ
diff --git a/doc/images/theano_logo_allblue_63x21.png b/doc/images/theano_logo_allblue_63x21.png
deleted file mode 100644
index e18d0aa449..0000000000
Binary files a/doc/images/theano_logo_allblue_63x21.png and /dev/null differ
diff --git a/doc/images/theano_logo_allblue_paths.svg b/doc/images/theano_logo_allblue_paths.svg
deleted file mode 100644
index 93a8f08a50..0000000000
--- a/doc/images/theano_logo_allblue_paths.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg viewBox="0 0 334 77" xmlns="http://www.w3.org/2000/svg" fill-rule="evenodd" clip-rule="evenodd" stroke-linejoin="round" stroke-miterlimit="1.414"><path d="M22.64 5.531v14.907h19.594v6.703H22.64v28.5c0 3.875.735 6.578 2.203 8.109 1.469 1.531 4.032 2.297 7.688 2.297h9.703v6.891H31.687c-6.469 0-11.031-1.297-13.687-3.891-2.657-2.594-3.985-7.063-3.985-13.406v-28.5H0v-6.703h14.015V5.531h8.625zM100.875 40.406v32.531h-8.672V40.406c0-4.718-.828-8.187-2.485-10.406-1.656-2.219-4.25-3.328-7.781-3.328-4.031 0-7.141 1.437-9.328 4.312-2.156 2.844-3.234 6.938-3.234 12.282v29.671H60.75V0h8.625v28.312c1.531-3 3.609-5.265 6.234-6.796 2.625-1.563 5.734-2.344 9.328-2.344 5.344 0 9.328 1.766 11.953 5.297 2.656 3.5 3.985 8.812 3.985 15.937zM200.015 46.547h-2.859c-5.031 0-8.828.891-11.391 2.672-2.531 1.75-3.797 4.375-3.797 7.875 0 3.156.953 5.609 2.86 7.359 1.906 1.75 4.547 2.625 7.922 2.625 4.75 0 8.484-1.64 11.203-4.922 2.718-3.312 4.093-7.875 4.125-13.687v-1.922h-8.063zm16.735-3.563v29.954h-8.672v-7.782c-1.844 3.125-4.172 5.438-6.985 6.938-2.781 1.469-6.172 2.203-10.172 2.203-5.343 0-9.609-1.5-12.796-4.5-3.188-3.031-4.782-7.078-4.782-12.141 0-5.843 1.953-10.281 5.86-13.312 3.937-3.031 9.703-4.547 17.297-4.547h11.578v-1.359c-.032-4.188-1.094-7.219-3.188-9.094-2.094-1.906-5.437-2.86-10.031-2.86-2.938 0-5.906.422-8.906 1.266-3 .844-5.922 2.078-8.766 3.703v-8.625c3.188-1.219 6.234-2.125 9.141-2.719 2.937-.625 5.781-.937 8.531-.937 4.344 0 8.047.641 11.109 1.922 3.094 1.281 5.594 3.203 7.5 5.765 1.188 1.563 2.032 3.5 2.532 5.813.5 2.281.75 5.719.75 10.312zM274.125 40.406v32.531h-8.672V40.406c0-4.718-.828-8.187-2.485-10.406-1.656-2.219-4.25-3.328-7.781-3.328-4.031 0-7.141 1.437-9.328 4.312-2.156 2.844-3.234 6.938-3.234 12.282v29.671H234v-52.5h8.625v7.875c1.531-3 3.609-5.265 6.234-6.796 2.625-1.563 5.734-2.344 9.328-2.344 5.344 0 9.328 1.766 11.953 5.297 2.656 3.5 3.985 8.812 3.985 15.937zM311.484 26.484c-4.375 0-7.688 1.704-9.938 5.11-2.25 3.406-3.375 8.453-3.375 15.14 0 6.657 1.125 11.704 3.375 15.141 2.25 3.406 5.563 5.109 9.938 5.109 4.406 0 7.734-1.703 9.984-5.109 2.25-3.437 3.375-8.484 3.375-15.141 0-6.687-1.125-11.734-3.375-15.14-2.25-3.406-5.578-5.11-9.984-5.11zm0-7.312c7.281 0 12.844 2.359 16.687 7.078 3.875 4.719 5.813 11.547 5.813 20.484 0 8.969-1.922 15.813-5.766 20.532-3.843 4.687-9.422 7.031-16.734 7.031-7.281 0-12.844-2.344-16.688-7.031-3.843-4.719-5.765-11.563-5.765-20.532 0-8.937 1.922-15.765 5.765-20.484 3.844-4.719 9.407-7.078 16.688-7.078z" fill="#11557c" fill-rule="nonzero" stroke="#fff" stroke-width=".7"/><path d="M134.288 75.725c-6.943-1.349-11.004-3.202-14.915-8.805-2.839-4.067-3.295-7.399-3.349-13.129-.026-2.737.052-3.331.61-4.682l.64-1.547.762 1.652c.877 1.9 1.735 3.243 2.299 3.598.335.212.423-.23.604-3.044.116-1.81.289-3.808.384-4.44.173-1.146.177-1.151 1.688-1.725 2.879-1.095 8.234-1.235 17.879-.467 4.421.353 13.936.547 14.946-.328 1.047-.908 1.174-1.322.843-3.81-.391-2.938.024-3.604-1.22-6.056-3.225-6.358-8.575-9.066-15.651-9.084-6.374-.016-11.486 2.316-14.577 7.508-2.719 4.568-3.931 9.464-4.648 18.768l-.201 2.612-.605-.87c-2.899-4.163-4.696-11.432-3.449-16.963.965-4.283 2.936-8.004 6.135-11.582 2.761-3.089 7.305-4.863 12.754-5.903 3.509-.669 5.91-.669 9.42 0 5.449 1.04 9.398 3.026 12.159 6.115 3.165 3.54 5.167 7.294 6.1 11.44.63 2.799.732 7.043.202 8.383-.464 1.174-3.886 4.995-4.001 4.467-.048-.216.051-.589.219-.827.471-.669.045-.944-.671-.433-.556.397-.653.408-.977.111-.684-.625-.935.024-.308.795.725.892.738 1.192.051 1.192-.294 0-.602.084-.684.185-.655.804-10.805.994-18.863.352-5.959-.475-10.697-.373-11.805.252-.912.515-2.12 2.335-2.528 3.808-.387 1.401-.163 5.65.389 7.355.557 1.72 2.531 4.226 4.591 5.827 5.758 4.476 15.264 4.527 21.054.114 2.911-2.219 5.261-5.784 4.571-6.935-.174-.291-.317-.858-.317-1.259 0-.73.835-2.236 1.382-2.493.772-.362 1.643-1.806 1.643-2.723 0-1.061.375-1.766.939-1.766.877 0 .773-2.724-.149-3.891-.708-.896-.644-1.081.173-.497.528.376.688.415.688.166 0-.178.209-.453.464-.611.415-.256.437-.242.206.131-.141.23-.221 1.348-.177 2.485.073 1.874.14 2.101.719 2.448.521.311.665.641.783 1.799.101.997.408 1.839 1.034 2.841.724 1.158.864 1.598.75 2.364-.078.529-.382 1.14-.694 1.397-.69.568-3.482 4.241-3.764 4.95-.51 1.286-2.225 3.984-3.344 5.263-1.73 1.977-3.598 3.165-6.824 4.342-4.821 1.759-8.569 2.081-13.36 1.15z" fill="#11557c" fill-rule="nonzero" stroke="#fff" stroke-width=".74" stroke-linejoin="miter" stroke-miterlimit="4"/></svg>
\ No newline at end of file
diff --git a/doc/images/theano_logo_allwhite_210x70.png b/doc/images/theano_logo_allwhite_210x70.png
deleted file mode 100644
index 46046a5af7..0000000000
Binary files a/doc/images/theano_logo_allwhite_210x70.png and /dev/null differ
diff --git a/doc/index.txt b/doc/index.txt
index 5ff2247fd9..eca3d893b7 100644
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -2,269 +2,68 @@
 Welcome
 =======
 
-Theano is a Python library that allows you to define, optimize, and
+Aesara is a Python library that allows you to define, optimize, and
 evaluate mathematical expressions involving multi-dimensional
-arrays efficiently. Theano features:
+arrays efficiently. Aesara features:
 
-* **tight integration with NumPy** -- Use `numpy.ndarray` in Theano-compiled functions.
+* **tight integration with NumPy** -- Use `numpy.ndarray` in Aesara-compiled functions.
 * **transparent use of a GPU** -- Perform data-intensive computations much faster than on a CPU.
-* **efficient symbolic differentiation** -- Theano does your derivatives for functions with one or many inputs.
+* **efficient symbolic differentiation** -- Aesara does your derivatives for functions with one or many inputs.
 * **speed and stability optimizations** -- Get the right answer for ``log(1+x)`` even when ``x`` is really tiny.
 * **dynamic C code generation** -- Evaluate expressions faster.
 * **extensive unit-testing and self-verification** -- Detect and diagnose many types of errors.
 
-Theano has been powering large-scale computationally intensive
-scientific investigations since 2007.  But it is also approachable
-enough to be used in the classroom (University of Montreal's `deep
-learning/machine learning <https://mila.umontreal.ca/en/cours/>`_ classes).
-
-News
-====
-
-* 2018/05/23: Release of Theano 1.0.2 (bug fixes). Everybody is encouraged to update.
-
-* 2017/12/06: Release of Theano 1.0.1 (bug fixes). Everybody is encouraged to update.
-
-* 2017/11/15: Release of Theano 1.0.0. Everybody is encouraged to update.
-
-* 2017/10/30: Release of Theano 1.0.0rc1, new features and many bugfixes, final release to coming.
-
-* 2017/10/16: Release of Theano 0.10.0beta4, new features and many bugfixes, release candidate to coming.
-
-* 2017/09/28: IMPORTANT: `MILA will stop developing Theano <https://groups.google.com/d/msg/theano-users/7Poq8BZutbY/rNCIfvAEAwAJ>`_ and the next release (renamed to 1.0) will be the last main release.
-
-* 2017/09/20: Release of Theano 0.10.0beta3, new features and many bugfixes, release candidate to coming.
-
-* 2017/09/07: Release of Theano 0.10.0beta2, new features and many bugfixes, release candidate to coming.
-
-* 2017/08/09: Release of Theano 0.10.0beta1, many improvements and bugfixes, release candidate to coming.
-
-* Removed support for the old (device=gpu) backend.  Use the new
-  backend (device=cuda) for gpu computing.  See `Converting to the new
-  gpu back end(gpuarray)
-  <https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29>`_
-  for help with conversion.
-
-* 2017/03/20: Release of Theano 0.9.0. Everybody is encouraged to update.
-
-* 2017/03/13: Release of Theano 0.9.0rc4, with crash fixes and bug fixes.
-
-* 2017/03/06: Release of Theano 0.9.0rc3, with crash fixes, bug fixes and improvements.
-
-* 2017/02/27: Release of Theano 0.9.0rc2, with crash fixes, bug fixes and improvements.
-
-* 2017/02/20: Release of Theano 0.9.0rc1, many improvements and bugfixes, final release to coming.
-
-* 2017/01/24: Release of Theano 0.9.0beta1, many improvements and bugfixes, release candidate to coming.
-
-* 2016/05/09: New technical report on Theano:
-  `Theano: A Python framework for fast computation of mathematical expressions <http://arxiv.org/abs/1605.02688>`_.
-  This is the new preferred reference.
-
-* 2016/04/21: Release of Theano 0.8.2, adding support for CuDNN v5.
-
-* 2016/03/29: Release of Theano 0.8.1, fixing a compilation issue on MacOS X with XCode 7.3.
-
-* 2016/03/21: Release of Theano 0.8. Everybody is encouraged to update.
-
-* Multi-GPU.
-
-* We added support for CNMeM to speed up the GPU memory allocation.
-
-* Theano 0.7 was released 26th March 2015. Everybody is encouraged to update.
-
-* We support cuDNN if it is installed by the user.
-
-* Open Machine Learning Workshop 2014 :download:`presentation <omlw2014/omlw_presentation.pdf>`.
-
-* Colin Raffel `tutorial on Theano <http://nbviewer.ipython.org/github/craffel/theano-tutorial/blob/master/Theano%20Tutorial.ipynb>`_.
-
-* Ian Goodfellow did a `12h class with exercises on Theano <https://github.com/goodfeli/theano_exercises>`_.
-
-* New technical report on Theano: `Theano: new features and speed improvements <http://arxiv.org/abs/1211.5590>`_.
-
-* `HPCS 2011 Tutorial <http://www.iro.umontreal.ca/~lisa/pointeurs/tutorial_hpcs2011_fixed.pdf>`_.
-  We included a few fixes discovered while doing the Tutorial.
-
-.. image:: images/talk2010.png
-    :scale: 75%
-    :align: left
-
-You can watch a quick (20 minute) introduction to Theano given as a talk at `SciPy 2010 <http://conference.scipy.org/scipy2010/>`_ via streaming (or downloaded) video:
-
-  `Transparent GPU Computing With Theano`_.
-  James Bergstra, SciPy 2010, June 30, 2010.
-
-.. _Transparent GPU Computing With Theano: http://www.archive.org/details/Scipy2010-JamesBergstra-TransparentGpuComputingWithTheano
+Aesara is based on Aesara, which has been powering large-scale computationally
+intensive scientific investigations since 2007.
 
 Download
 ========
 
-Theano is now `available on PyPI`_, and can be installed via ``easy_install
-Theano``, ``pip install Theano`` or by downloading and unpacking the tarball
-and typing ``python setup.py install``.
+Aesara is `available on PyPI`_, and can be installed via ``pip install Aesara``.
 
 Those interested in bleeding-edge features should obtain the latest development
 version, available via::
 
-    git clone git://github.com/Theano/Theano.git
+    git clone git://github.com/pymc-devs/aesara.git
 
 You can then place the checkout directory on your ``$PYTHONPATH`` or use
 ``python setup.py develop`` to install a ``.pth`` into your ``site-packages``
 directory, so that when you pull updates via Git, they will be
 automatically reflected the "installed" version. For more information about
-installation and configuration, see :ref:`installing Theano <install>`.
-
-.. only:: html
-
-  Status
-  ======
-
-  .. raw:: html
-
-     <a href="http://travis-ci.org/Theano/Theano/builds"><img src="https://secure.travis-ci.org/Theano/Theano.png?branch=master" /></a>&nbsp;
-
-  .. raw:: html
-
-     <a href="https://crate.io/packages/Theano/"><img src="https://img.shields.io/pypi/v/Theano.svg" alt="Latest PyPI version" /></a>&nbsp;
+installation and configuration, see :ref:`installing Aesara <install>`.
 
-  .. raw:: html
-
-     <a href="https://crate.io/packages/Theano/"><img src="https://img.shields.io/pypi/dm/Theano.svg" alt="Number of PyPI downloads" /></a>&nbsp;
-
-.. _available on PyPI: http://pypi.python.org/pypi/Theano
-.. _Related Projects: https://github.com/Theano/Theano/wiki/Related-projects
-
-Citing Theano
-==============
-
-If you use Theano for academic research, you are highly encouraged (though not
-required) to cite the following, most recent paper:
-
-* Theano Development Team. `"Theano: A Python framework for fast computation of mathematical expressions"
-  <http://arxiv.org/pdf/1605.02688.pdf>`_.
-  (:download:`short BibTeX <theano-short.bib>`, :download:`full BibTeX <theano-full.bib>`)
-
-Theano is primarily developed by academics, and so citations matter a lot to
-us. As an added benefit, you increase Theano's exposure and potential user
-(and developer) base, which is to the benefit of all users of Theano. Thanks
-in advance!
-
-See our :ref:`citation` for details.
+.. _available on PyPI: http://pypi.python.org/pypi/aesara
+.. _Related Projects: https://github.com/pymc-devs/aesara/wiki/Related-projects
 
 Documentation
 =============
 
 Roughly in order of what you'll want to check out:
 
-* :ref:`install` -- How to install Theano.
-* :ref:`introduction` -- What is Theano?
+* :ref:`install` -- How to install Aesara.
+* :ref:`introduction` -- What is Aesara?
 * :ref:`tutorial` -- Learn the basics.
 * :ref:`troubleshooting` -- Tips and tricks for common debugging.
-* :ref:`libdoc` -- Theano's functionality, module by module.
+* :ref:`libdoc` -- Aesara's functionality, module by module.
 * :ref:`faq` -- A set of commonly asked questions.
-* :ref:`optimizations` -- Guide to Theano's graph optimizations.
+* :ref:`optimizations` -- Guide to Aesara's graph optimizations.
 * :ref:`extending` -- Learn to add a Type, Op, or graph optimization.
-* :ref:`dev_start_guide` -- How to contribute code to Theano.
-* :ref:`internal` -- How to maintain Theano and more...
+* :ref:`dev_start_guide` -- How to contribute code to Aesara.
+* :ref:`internal` -- How to maintain Aesara and more...
 * :ref:`release` -- How our release should work.
 * :ref:`acknowledgement` -- What we took from other projects.
-* `Related Projects`_ -- link to other projects that implement new functionalities on top of Theano
-
-You can download the latest `PDF documentation <http://deeplearning.net/software/theano/theano.pdf>`_, rather than reading it online.
-
-Check out how Theano can be used for Machine Learning: `Deep Learning Tutorials <http://www.deeplearning.net/tutorial>`_.
-
-Theano was featured at `SciPy 2010 <http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/461>`__.
+* `Related Projects`_ -- link to other projects that implement new functionalities on top of Aesara
 
 
-.. _theano_community:
+.. _aesara_community:
 
-Community
-=========
+* Visit `theano-users`_ if you want to talk to all Theano users.
 
-    "Thank YOU for correcting it so quickly. I wish all packages I worked
-    with would have such an active maintenance - this is as good as it
-    gets :-)"
-
-    (theano-users, Aug 2, 2010)
-
-
-* Register to `theano-announce`_ if you want to be kept informed on important change on theano(low volume).
-
-* Register and post to `theano-users`_ if you want to talk to all Theano users.
-
-* Register and post to `theano-dev`_ if you want to talk to the developers.
-
-* Register to `theano-github`_ if you want to receive an email for all changes to the GitHub repository.
-
-* Register to `theano-buildbot`_ if you want to receive our daily buildbot email.
+* Visit `theano-dev`_ if you want to talk to the developers.
 
 * Ask/view questions/answers at `StackOverflow`_
 
-* We use `Github tickets <http://github.com/Theano/Theano/issues>`__ to keep track of issues
-  (however, some old tickets can still be found on
-  `Assembla <http://www.assembla.com/spaces/theano/tickets>`__).
-
-* Come visit us in Montreal! Most developers are students in the LISA_ group at the `University of Montreal`_.
-
-
-.. _support:
-
-Help!
-=====
-
-How to Seek Help
-----------------
-
-The appropriate venue for seeking help depends on the kind of question you have.
-
-* How do I? -- `theano-users`_ mailing list or `StackOverflow`_
-* I got this error, why? -- `theano-users`_ mailing list or `StackOverflow`_ (please include the *full* error message, even if it's long)
-* I got this error and I'm sure it's a bug -- `Github ticket <http://github.com/Theano/Theano/issues>`__
-* I have an idea/request -- post the suggestion to `theano-dev`_ or, even better, implement the idea and submit a `GitHub <https://github.com/Theano/Theano/pulls>`__ pull request!
-* Why do you? -- `theano-users`_ mailing list (not appropriate for StackOverflow)
-* When will you? -- `theano-dev`_ mailing list (not appropriate for StackOverflow)
-
-Please do take some time to search for similar questions that were asked and answered in the past. If you find something
-similar that doesn't fully answer your question, it can be helpful to say something like "I found X but it doesn't
-address facet Y" and link to the previous discussion.
-
-When asking questions on StackOverflow, please use the *theano* tag, so your question can be found, and follow
-StackOverflow's guidance on `asking questions <http://stackoverflow.com/help/asking>`__. Consider also using the
-*python* and *numpy* tags, especially if you are unsure which library your problem relates to.
-
-It's often helpful to include the following details with your question:
-
-* If you have an error, the *full* error message, even if it's long
-* Which versions of Python and Theano you're using
-* Whether you're using a CPU or GPU device
-* Details of your Theano configuration settings (you can print this in Python via `print theano.config <http://deeplearning.net/software/theano/library/config.html>`__)
-
-Spending the time to create a minimal specific example of a problem is likely to get you to an answer quicker than
-posting something quickly that has too much irrelevant detail or is too vague. A minimal example may take you a bit more
-time to create but the first response is more likely to be the answer you need than, rather than a frustrated request
-for clarification.
-
-
-How to provide help
--------------------
-
-If you see a question on the `theano-users`_ mailing list, or on `StackOverflow`_, that you
-feel reasonably confident you know an answer to, please do support the community by helping others.
-
-We were all newbies to Theano once and, as the community expands, there is a constant stream of new Theano users looking
-for help. Perhaps you asked a question when you were first starting out? Now you can pay it forward by helping others.
-It's also a good way to reinforce your own Theano knowledge.
-
-Often it's easiest to answer a question directly but sometimes it may be better to refer people to a good answer that
-was provided in the past. Pointing people to relevant sections in the documentation, or to a Theano tutorial, can also
-be helpful.
-
-When answering questions please `be nice <http://stackoverflow.com/help/be-nice>`__ (as always!) and, on
-StackOverflow, follow their guidance for `answering questions <http://stackoverflow.com/help/answering>`__.
-
-
+* We use `Github tickets <http://github.com/pymc-devs/aesara/issues>`__ to keep track of issues.
 
 .. toctree::
    :maxdepth: 1
@@ -290,12 +89,4 @@ StackOverflow, follow their guidance for `answering questions <http://stackoverf
 
 .. _theano-dev: http://groups.google.com/group/theano-dev
 .. _theano-users: http://groups.google.com/group/theano-users
-.. _theano-announce: http://groups.google.com/group/theano-announce
-.. _theano-github: http://groups.google.com/group/theano-github
-.. _theano-buildbot: http://groups.google.com/group/theano-buildbot
-.. _tickets: http://pylearn.org/theano/trac/query?status=accepted&status=assigned&status=new&status=reopened&group=milestone&max=200&col=id&col=summary&col=status&col=owner&col=type&col=priority&col=component&col=time&report=9&order=priority
-
 .. _StackOverflow: http://stackoverflow.com/questions/tagged/theano
-
-.. _LISA: http://www.iro.umontreal.ca/~lisa
-.. _University of Montreal: http://www.umontreal.ca
diff --git a/doc/install.txt b/doc/install.txt
index 0bfb9aec35..a3e294a2cd 100755
--- a/doc/install.txt
+++ b/doc/install.txt
@@ -1,6 +1,6 @@
 .. _install:
 
-Installing Theano 
+Installing Aesara
 =================
 
 Supported platforms:
diff --git a/doc/install_centos6.txt b/doc/install_centos6.txt
index 976b0fe617..392abefc98 100644
--- a/doc/install_centos6.txt
+++ b/doc/install_centos6.txt
@@ -7,9 +7,9 @@ CentOS 6 Installation Instructions
 ##################################
 
 .. warning::
-    If you want to install the bleeding-edge or development version of Theano
+    If you want to install the bleeding-edge or development version of Aesara
     from GitHub, please make sure you are reading `the latest version of this
-    page <http://deeplearning.net/software/theano_versions/dev/install_centos6.html>`_.
+    page <http://deeplearning.net/software/aesara_versions/dev/install_centos6.html>`_.
 
 .. |PythonDistRecommended| replace:: The development package (python-dev or python-devel on most Linux distributions) is recommended (see just below)
 .. |PlatformCompiler| replace:: ``python-dev``, ``g++`` >= 4.2
diff --git a/doc/install_generic.inc b/doc/install_generic.inc
index a20a32c4fa..d5981e5487 100644
--- a/doc/install_generic.inc
+++ b/doc/install_generic.inc
@@ -12,37 +12,37 @@ Stable Installation
 With ``conda``
 ^^^^^^^^^^^^^^
 
-If you use conda, you can directly install both theano and pygpu. Libgpuarray
+If you use conda, you can directly install both aesara and pygpu. Libgpuarray
 will be automatically installed as a dependency of pygpu.
 
 .. code-block:: bash
 
-    conda install theano pygpu
+    conda install aesara pygpu
 
 .. warning::
 
-    Latest conda packages for theano (``>= 0.9``) and pygpu (``>= 0.6*``) currently don't support
-    Python 3.4 branch.
+   The PyMC developers do not maintain ``pygpu``, so compatibility isn't
+   guaranteed.
 
 With ``pip``
 ^^^^^^^^^^^^
 
-If you use pip, you have to install Theano and libgpuarray separately.
+If you use pip, you have to install Aesara and libgpuarray separately.
 
-theano
+aesara
 ::::::
 
-Install the latest stable version of Theano with:
+Install the latest stable version of Aesara with:
 
 .. raw:: html
 
-    <div class="highlight"><pre><span class="red">&lt;sudo&gt;</span> pip install <span class="blue">&lt;--user&gt;</span> Theano[test, doc]</pre></div>
+    <div class="highlight"><pre><span class="red">&lt;sudo&gt;</span> pip install <span class="blue">&lt;--user&gt;</span> Aesara[test, doc]</pre></div>
 
 - Any argument between <...> is optional.
 
 - Use :red:`sudo` for a root installation.
 
-- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
+- Use :blue:`user` for a user installation without admin rights. It will install Aesara in your local site-packages.
 
 - Use `pip install -r requirements.txt` to install the requirements for testing.
 
@@ -50,8 +50,6 @@ Install the latest stable version of Theano with:
 
 If you encountered any trouble, head to the :ref:`troubleshooting` page.
 
-The latest stable version of Theano-PyMC is ``1.0.5`` (tagged with ``rel-1.0.5``).
-
 libgpuarray
 :::::::::::
 
@@ -66,19 +64,19 @@ and then follow the `Step-by-step instructions <http://deeplearning.net/software
 Bleeding-Edge Installation (recommended)
 ----------------------------------------
 
-Install the latest, bleeding-edge, development version of Theano with:
+Install the latest, bleeding-edge, development version of Aesara with:
 
 .. raw:: html
 
-    <div class='highlight'><pre><span class="red">&lt;sudo&gt;</span> pip install <span class="blue">&lt;--user&gt;</span> <span class="pink">&lt;--no-deps&gt;</span> git+https://github.com/Theano/Theano.git#egg=Theano</pre></div>
+    <div class='highlight'><pre><span class="red">&lt;sudo&gt;</span> pip install <span class="blue">&lt;--user&gt;</span> <span class="pink">&lt;--no-deps&gt;</span> git+https://github.com/pymc-devs/aesara.git#egg=aesara</pre></div>
 
 - Any argument between <...> is optional.
 
 - Use :red:`sudo` for a root installation.
 
-- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
+- Use :blue:`user` for a user installation without admin rights. It will install Aesara in your local site-packages.
 
-- Use :pink:`no-deps` when you don't want the dependencies of Theano to be installed through pip. This is important when they have already been installed as system packages.
+- Use :pink:`no-deps` when you don't want the dependencies of Aesara to be installed through pip. This is important when they have already been installed as system packages.
 
 If you encountered any trouble, head to the :ref:`troubleshooting` page.
 
@@ -98,21 +96,21 @@ Install the latest, development version of libgpuarray following the
 Developer Installation
 ----------------------
 
-Install the developer version of Theano with:
+Install the developer version of Aesara with:
 
 .. raw:: html
 
-    <div class="highlight"><pre>git clone git://github.com/Theano/Theano.git
-    cd Theano
+    <div class="highlight"><pre>git clone git://github.com/pymc-devs/aesara.git
+    cd aesara
     <span class="red">&lt;sudo&gt;</span> pip install <span class="blue">&lt;--user&gt;</span> <span class="pink">&lt;--no-deps&gt;</span> <span class="green">-e .</span></pre></div>
 
 - Any argument between <...> is optional.
 
 - Use :red:`sudo` for a root installation.
 
-- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
+- Use :blue:`user` for a user installation without admin rights. It will install Aesara in your local site-packages.
 
-- Use :pink:`no-deps` when you don't want the dependencies of Theano to be installed through pip. This is important when they have already been installed as system packages.
+- Use :pink:`no-deps` when you don't want the dependencies of Aesara to be installed through pip. This is important when they have already been installed as system packages.
 
 - :green:`-e` makes your installation *editable*, i.e., it links it to your
   source directory.
diff --git a/doc/install_macos.txt b/doc/install_macos.txt
index e13c60cc4f..6cf7364b15 100644
--- a/doc/install_macos.txt
+++ b/doc/install_macos.txt
@@ -6,18 +6,10 @@
 Mac OS Installation Instructions
 ################################
 
-.. warning::
-    If you want to install the bleeding-edge or development version of Theano
-    from GitHub, please make sure you are reading `the latest version of this
-    page <http://deeplearning.net/software/theano_versions/dev/install_macos.html>`_.
-
-There are various ways to install Theano dependencies on a Mac. Here
+There are various ways to install Aesara dependencies on a Mac. Here
 we describe the process in detail with Anaconda, Homebrew or MacPorts
 but if you did it differently and it worked, please let us know the
-details on the `theano-users`_ mailing-list, so that we can add
-alternative instructions here.
-
-.. _theano-users: http://groups.google.com/group/theano-users?pli=1
+details so that we can add alternative instructions.
 
 .. |PythonDistRecommended| replace:: The conda distribution is highly recommended
 .. |PlatformCompiler| replace:: ``clang`` (the system version)
@@ -42,7 +34,7 @@ alternative instructions here.
 
 .. attention::
 
-    Theano officially supports only clang on OS X.  This can be installed
+    Aesara officially supports only clang on OS X.  This can be installed
     by getting XCode from the App Store and running it once to install the
     command-line tools.
 
@@ -78,7 +70,7 @@ Requirements through MacPorts (not recommended)
 -----------------------------------------------
 
 Using `MacPorts <http://www.macports.org/>`__ to install all required
-Theano dependencies is easy, but be aware that it will take a long time
+Aesara dependencies is easy, but be aware that it will take a long time
 (a few hours) to build and install everything.
 
 - MacPorts requires installing XCode first (which can be found in the
@@ -96,7 +88,7 @@ Theano dependencies is easy, but be aware that it will take a long time
 
         $ sudo port install py27-numpy +atlas py27-scipy +atlas py27-pip
 
-  This will install all the required Theano dependencies. gcc will
+  This will install all the required Aesara dependencies. gcc will
   be automatically installed (since it is a SciPy dependency), but be
   aware that it takes a long time to compile (hours)!
   Having NumPy and SciPy linked with ATLAS (an optimized BLAS
diff --git a/doc/install_others.txt b/doc/install_others.txt
deleted file mode 100644
index cc70652436..0000000000
--- a/doc/install_others.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-.. _install_others:
-
-
-Other Platform-specific Installations
-=====================================
-
-.. warning::
-
-    These instructions are not kept up to date.
-
-NVIDIA Jetson TX1 embedded platform
------------------------------------
-
-.. code-block:: bash
-
-    sudo apt-get install python-numpy python-scipy python-dev python-pip python-pytest g++ libblas-dev git
-    pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git --user  # Need Theano 0.8 or more recent
-
-Gentoo
-------
-
-Brian Vandenberg emailed `installation instructions on Gentoo
-<http://groups.google.com/d/msg/theano-dev/-8WCMn2FMR0/bJPasoZXaqoJ>`_,
-focusing on how to install the appropriate dependencies.
-
-Nicolas Pinto provides `ebuild scripts <https://github.com/npinto/sekyfsr-gentoo-overlay/tree/master/sci-libs/Theano>`_.
-
-AWS Marketplace with Bitfusion AMI
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-AWS EC2 AMI pre-installed with Nvidia drivers, CUDA, cuDNN, Theano, Keras, Lasagne, Python 2, Python 3, PyCuda, Scikit-Learn, Pandas, Enum34, iPython, and Jupyter. Note, as always there is no charge for Theano and other open software, however there is a charge for AWS hosting + Bitfusion.
-
-`Launch <https://aws.amazon.com/marketplace/pp/B01IRAMCM8/ref=_ptnr_referral_docs_theano>`_ an instance from the AWS Marketplace.
-
-Docker
-~~~~~~
-
-Builds of Theano are available as `Docker <https://www.docker.com>`_
-images: `Theano Docker (CPU) <https://hub.docker.com/r/kaixhin/theano/>`_ or
-`Theano Docker (CUDA) <https://hub.docker.com/r/kaixhin/cuda-theano/>`_. These
-are updated on a weekly basis with bleeding-edge builds of Theano.
-Examples of running bash in a Docker container are as follows:
-
-.. code-block:: bash
-
-    sudo docker run -it kaixhin/theano
-    sudo nvidia-docker run -it kaixhin/cuda-theano:7.0
-
-For a guide to Docker, see the `official docs <https://docs.docker.com>`_.
-CUDA support requires `NVIDIA Docker <https://github.com/NVIDIA/nvidia-docker>`_.
-For more details on how to use the Theano Docker images,
-consult the `source project <https://github.com/Kaixhin/dockerfiles>`_.
diff --git a/doc/install_ubuntu.txt b/doc/install_ubuntu.txt
index 099786eb19..4827be1044 100644
--- a/doc/install_ubuntu.txt
+++ b/doc/install_ubuntu.txt
@@ -7,9 +7,9 @@ Ubuntu Installation Instructions
 ################################
 
 .. warning::
-    If you want to install the bleeding-edge or development version of Theano
+    If you want to install the bleeding-edge or development version of Aesara
     from GitHub, please make sure you are reading `the latest version of this
-    page <http://deeplearning.net/software/theano_versions/dev/install_ubuntu.html>`_.
+    page <http://deeplearning.net/software/aesara_versions/dev/install_ubuntu.html>`_.
 
 .. _gpu_linux:
 
@@ -33,7 +33,7 @@ For Ubuntu 16.04 with cuda 7.5
 .. code-block:: bash
 
     sudo apt-get install python-numpy python-scipy python-dev python-pip python-pytest g++ libopenblas-dev git graphviz
-    sudo pip install Theano
+    sudo pip install Aesara
 
     # cuda 7.5 don't support the default g++ version. Install an supported version and make it the default.
     sudo apt-get install g++-4.9
@@ -61,7 +61,7 @@ On 14.04, this will install Python 2 by default. If you want to use Python 3:
 .. code-block:: bash
 
     sudo apt-get install python3-numpy python3-scipy python3-dev python3-pip python3-pytest g++ libopenblas-dev git
-    sudo pip3 install Theano
+    sudo pip3 install Aesara
 
 For Ubuntu 11.04:
 
@@ -86,7 +86,7 @@ some code that will help you.
     cd OpenBLAS
     make FC=gfortran
     sudo make PREFIX=/usr/local/ install
-    # Tell Theano to use OpenBLAS.
+    # Tell Aesara to use OpenBLAS.
     # This works only for the current user.
-    # Each Theano user on that computer should run that line.
-    echo -e "\n[blas]\nldflags = -lopenblas\n" >> ~/.theanorc
+    # Each Aesara user on that computer should run that line.
+    echo -e "\n[blas]\nldflags = -lopenblas\n" >> ~/.aesararc
diff --git a/doc/install_windows.txt b/doc/install_windows.txt
index 46474e7f9e..2b34b47bfd 100644
--- a/doc/install_windows.txt
+++ b/doc/install_windows.txt
@@ -7,9 +7,9 @@ Windows Installation Instructions
 #################################
 
 .. warning::
-    If you want to install the bleeding-edge or development version of Theano
+    If you want to install the bleeding-edge or development version of Aesara
     from GitHub, please make sure you are reading `the latest version of this
-    page <http://deeplearning.net/software/theano_versions/dev/install_windows.html>`_.
+    page <http://deeplearning.net/software/aesara_versions/dev/install_windows.html>`_.
 
 .. |PythonDistRecommended| replace:: The conda distribution is highly recommended
 .. |PlatformCompiler| replace:: GCC compiler with ``g++`` (version >= ``4.2.*``), and Python development files
@@ -30,7 +30,7 @@ Install requirements and optional packages
 
     * Arguments between <...> are optional.
     * ``m2w64-toolchain`` package provides a fully-compatible version of GCC and is then highly recommended.
-    * ``git`` package installs git source control through conda, which is required for the development versions of Theano and libgpuarray
+    * ``git`` package installs git source control through conda, which is required for the development versions of Aesara and libgpuarray
 
 .. _gpu_windows:
 
@@ -49,14 +49,14 @@ to install the CUDA driver and the CUDA Toolkit.
 
 You must reboot the computer after the driver installation.
 
-.. Installation of Theano and libgpuarray.
+.. Installation of Aesara and libgpuarray.
 .. include:: install_generic.inc
     :start-after: .. _install_generic:
 
 Instructions for other Python distributions (not recommended)
 =============================================================
 
-If you plan to use Theano with other Python distributions, these are
+If you plan to use Aesara with other Python distributions, these are
 generic guidelines to get a working environment:
 
     * Look for the mandatory requirements in the package manager's repositories of your distribution. Many
diff --git a/doc/internal/how_to_release.txt b/doc/internal/how_to_release.txt
index cddd23e007..bc6079f18b 100644
--- a/doc/internal/how_to_release.txt
+++ b/doc/internal/how_to_release.txt
@@ -4,163 +4,27 @@
 How to make a release
 ==================================================
 
-Update files
-============
-
-Update the NEWS.txt and move the old stuff in the HISTORY.txt file.
-To update the NEWS.txt file, check all ticket closed for this release
-and all commit log messages. Update the ``Theano/doc/index.txt`` *News* section.
-
-Update the "Vision"/"Vision State" in the file
-``Theano/doc/introduction.txt``.
-
-Update the file ``.mailmap`` to clean up the list of contributor.
-
-Get a fresh copy of the repository
-==================================
-
-Clone the code::
-
-    git clone git@github.com:Theano/Theano.git Theano-0.X
-
-It does not have to be in your PYTHONPATH.
-
 Update the version number
 =========================
 
-``Theano/doc/conf.py`` should be updated in the following ways:
+``Aesara/doc/conf.py`` should be updated in the following ways:
 
  * Change the upper copyright year to the current year if necessary.
 
-Update the year in the ``Theano/LICENSE.txt`` file too, if necessary.
-
-``NEWS.txt`` usually contains the name and date of the release, change them
-too.
+Update the year in the ``Aesara/LICENSE.txt`` file too, if necessary.
 
-Update the fallback version in ``theano/version.py``.
-
-Update the version in ``doc/install_generic.inc``.
-
-Update the code and the documentation for the theano flags
+Update the code and the documentation for the aesara flags
 ``warn__ignore_bug_before`` to accept the new version. You must modify the
-file ``theano/configdefaults.py`` and ``doc/library/config.txt``.
+file ``aesara/configdefaults.py`` and ``doc/library/config.txt``.
 
 Tag the release
 ===============
 
 You will need to commit the previous changes, tag the resulting version, and
-push that into the original repository. The syntax is something like the
-following::
-
-    git commit -m "Modifications for 0.X.Y release" setup.py doc/conf.py NEWS.txt HISTORY.txt theano/configdefaults.py doc/library/config.txt
-    git tag -a rel-0.X.Y
-    git push
-    git push --tags
-
-This will trigger and build and upload of the conda package to the
-mila-udem channel.
-
-The documentation will be automatically regenerated in the next few hours.
-
-Generate and upload the package
-===============================
-
-On PyPI
--------
-
-Set your umask to ``0022`` to ensure that the package file will be readable from other people.
-To check your umask::
-
-    umask
-
-To set your umask::
-
-    umask 0022
-
-Finally, use setuptools to build the release::
-
-    python setup.py sdist --formats=gztar
-
-Then use twine to upload the release
-
-    twine upload dist/Theano-1.0.X.tax.gz
-
-This command uploads the package on pypi.python.org. To be able
-to do that, you must register on PyPI (you can create an new account, or use
-OpenID), and be listed among the "Package Index Owners" of Theano.
-
-There is a bug in some versions of distutils that raises a
-UnicodeDecodeError if there are non-ASCII characters in NEWS.txt. You
-would need to change NEWS.txt so it contains only ASCII characters (the
-problem usually comes from diacritics in people's names).
-
-On mloss.org (for final releases only)
---------------------------------------
-
-Project page is at http://mloss.org/software/view/241/.
-Account jaberg is listed as submitter.
-
-1. log in as jaberg to mloss
-2. search for theano and click the logo
-3. press 'update this project' on the left and change
-
-  - the version number
-  - the download link
-  - the description of what has changed
-
-4. press save
-
-Make sure the "what's changed" text isn't too long because it will show up on
-the front page of mloss.  You have to indent bullet lines by 4 spaces I think in
-the description.
-
-You can "update this project" and save lots of times to get the revision text
-right. Just do not change the version number.
-
-
-Update documentation server scripts
-===================================
-
-The documentation server runs the auto-generation script regularly. It
-compiles the latest development version and puts it in 
-``$webroot/theano_versions/dev/``. It then checks if the release branch
-has been updated and if it has, the release documentation is updated and 
-put into ``$webroot/theano/``. Finally, it checks for archived versions in
-``$webroot/theano_versions/`` and generates a ``versions.json`` file
-on the server that is used to populate the version switcher.
-
-If the release branch has changed, you must update the web server script.
-Login to the ``deeplearning.net`` server as the user in charge of 
-document generation. In the shell script ``~/bin/updatedocs``, update the
-variable ``release`` to the branch name for the current release.
-
-You can also add previous releases to the versions documentation archive.
-In the script ``~/bin/updatedocs_versions``, change the variable
-``Versions`` to the git tag of the documentation version to generate,
-then run the script.
-
-
-Announce the release
-====================
-
-Generate an e-mail from the template in ``EMAIL.txt``, including content
-from ``NEWS.txt``.
-
-For final releases, send the e-mail to the following mailing lists:
-
-* theano-users
-* theano-announce
-* numpy-discussion@scipy.org
-* scipy-user@python.org
-* G+, Scientific Python: https://plus.google.com/communities/108773711053400791849
-
-For release candidates, only e-mail:
-
-* theano-announce
-* theano-dev
-* theano-users
+push that into the upstream/official repository.  After that, create a new release
+via GitHub Releases on the repository's page.  The release tag must start with
+``rel-`` in order to be recognized by the CI release process.
 
-For alpha and beta releases, only e-mail:
+This will trigger and build and upload of the PyPI and Conda packages.
 
-* theano-dev
-* theano-users
+The documentation will be automatically regenerated as well.
diff --git a/doc/internal/metadocumentation.txt b/doc/internal/metadocumentation.txt
index 9a214a53ff..09a52d586f 100644
--- a/doc/internal/metadocumentation.txt
+++ b/doc/internal/metadocumentation.txt
@@ -14,14 +14,14 @@ The documentation will be generated in the ``html`` directory.
 
 .. code-block:: bash
 
-    cd Theano/
+    cd Aesara/
     python ./doc/scripts/docgen.py
 
 If you don't want to generate the pdf, do the following:
 
 .. code-block:: bash
 
-    cd Theano/
+    cd Aesara/
     python ./doc/scripts/docgen.py --nopdf
 
 
@@ -87,28 +87,17 @@ It will not appear in the output generated.
     .. Nor will this.
 
 
-How documentation is built on deeplearning.net
+How documentation is built
 ----------------------------------------------
 
-The server that hosts the theano documentation runs a cron job roughly every
-2 hours that fetches a fresh Theano install (clone, not just pull) and
-executes the docgen.py script. It then over-writes the previous docs with the
-newly generated ones. 
-
-Note that the server will most definitely use a different version of sphinx
-than yours so formatting could be slightly off, or even wrong. If you're
-getting unxpected results and/or the auto-build of the documentation seems
-broken, please contact theano-dev@.
-
-In the future, we might go back to the system of auto-refresh on push (though
-that might increase the load of the server quite significantly).
+TBD
 
 pylint
 ---------------------------------------
 
 pylint output is not autogenerated anymore.
 
-Pylint documentation is generated using pylintrc file: ``Theano/doc/pylintrc``
+Pylint documentation is generated using pylintrc file: ``Aesara/doc/pylintrc``
 
 
 .. _metadocumentation_nightly_build:
@@ -116,19 +105,7 @@ Pylint documentation is generated using pylintrc file: ``Theano/doc/pylintrc``
 The nightly build/tests process
 ---------------------------------------
 
-We use the Jenkins software to run daily buildbots for Theano, libgpuarray and
-the Deep Learning Tutorials. Jenkins downloads/updates the repos and then runs their test
-scripts. Those scripts test the projects under various condition.
-Jenkins also run some tests in 32 bit Python 2.7 and Python 3.4 for Theano.
-
-The output is emailed automatically to the `theano-buildbot`_ mailing list. The
-jenkins log and test reports are published online:
-
-* `Theano buildbot <http://darjeeling.iro.umontreal.ca:8080/job/Theano_buildbot_multijob/>`__
-* `gpuarray buildbot <http://darjeeling.iro.umontreal.ca:8080/job/Buildbot_gpuarray/>`__
-
-
-.. _theano-buildbot: https://groups.google.com/group/theano-buildbot
+We use GitHub Actions to run daily builds and test for Aesara.
 
 TO WRITE
 ---------------------------------------
@@ -136,4 +113,3 @@ TO WRITE
 *There is other stuff to document here, e.g.:*
 
  * We also want examples of good documentation, to show people how to write ReST.
-
diff --git a/doc/internal/python.txt b/doc/internal/python.txt
index 396f617d35..4983fec3f1 100644
--- a/doc/internal/python.txt
+++ b/doc/internal/python.txt
@@ -11,7 +11,7 @@ give you a warm feeling in your stomach.
 Non-Basic Python features
 -------------------------
 
-Theano doesn't use your grandfather's python.
+Aesara doesn't use your grandfather's python.
 
   * properties
 
@@ -41,7 +41,7 @@ Theano doesn't use your grandfather's python.
      >>> f.a = 5
      >>> f()
      5
-        
+
   * Warning about mutual imports:
 
     * script a.py file defined a class A.
@@ -58,4 +58,3 @@ Theano doesn't use your grandfather's python.
     ``__main__.<classname>``, instead of type ``<file>.<classname>``.  This should never
     happen under normal usage, and can cause problems (like the one you are/were
     experiencing).
-
diff --git a/doc/internal/release.txt b/doc/internal/release.txt
deleted file mode 100644
index d857a6db35..0000000000
--- a/doc/internal/release.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-.. _release:
-
-=======
-Release
-=======
-
-Having a release system has many benefits. First and foremost, it makes trying
-out Theano easy. You can install a stable version of Theano, without having to
-worry about the current state of the repository.  While we usually try NOT to
-break the trunk, mistakes can happen. This also greatly simplifies the
-installation process: mercurial is no longer required and certain python
-dependencies can be handled automatically (numpy for now, cython later).
-
-The Theano release plan is detailed below. Comments and/or suggestions are
-welcome on the mailing list.
-
-1) We aim to update Theano several times a year. These releases will be made
-   as new features are implemented.     
-
-2) Urgent releases will only be made when a bug generating incorrect
-   output is discovered and fixed.
-
-3) Each release must satisfy the following criteria. Non-compliance will
-   result in us delaying or skipping the release in question.
-
-    1) No regression errors.
-    2) No known, silent errors.
-    3) No errors giving incorrect results.
-    4) No test errors/failures, except for known errors.
-
-        1) Known errors should not be used to encode "feature wish lists", as
-           is currently the case.
-        2) Incorrect results should raise errors and not known errors (this
-           has always been the case)
-        3) All known errors should have a ticket and a reference to that
-           ticket in the error message.
-
-    5) All commits should have been reviewed, to ensure none of the above
-       problems are introduced.
-
-4) The release numbers will follow the X.Y.Z scheme:
-
-   1) We update Z for small urgent bugs or support for new versions of dependencies.
-   2) We update Y for interface changes and/or significant features
-      we wish to publicize.
-   3) The Theano v1.0.0 release will be made when the interface is deemed
-      stable enough and covers most of numpy's interface.
-
-5) The trunk will be tagged on each release.
-
-6) Each release will be uploaded to pypi.python.org, mloss.org and freshmeat.net
-
-7) Release emails will be sent to theano-users, theano-announce, numpy-discussion@scipy.org and scipy-user@scipy.org .
-
-Optional:
-
-8) A 1-week scrum might take place before a release, in order to fix bugs
-   which would otherwise prevent a release.
-
-    1) Occasional deadlines might cause us to skip a release.
-    2) Everybody can (and should) participate, even people on the mailing
-       list.
-    3) The scrum should encourage people to finish what they have already
-       started (missing documentation, missing test, ...). This should help
-       push out new features and keep the documentation up to date.
-    4) If possible, aim for the inclusion of one new interesting feature.
-    5) Participating in the scrum should benefit all those involved, as you
-       will learn more about our tools and help develop them in the process. A
-       good indication that you should participate is if you have a need for a
-       feature which is not yet implemented.
diff --git a/doc/introduction.txt b/doc/introduction.txt
index d0d7557649..02bffc37cb 100644
--- a/doc/introduction.txt
+++ b/doc/introduction.txt
@@ -2,26 +2,26 @@
 .. _introduction:
 
 ==================
-Theano at a Glance
+Aesara at a Glance
 ==================
 
-Theano is a Python library that lets you define, optimize, and evaluate
+Aesara is a Python library that lets you define, optimize, and evaluate
 mathematical expressions, especially ones with multi-dimensional arrays
-(numpy.ndarray).  Using Theano it is
+(numpy.ndarray).  Using Aesara it is
 possible to attain speeds rivaling hand-crafted C implementations for problems
 involving large amounts of data.  It can also surpass C on a CPU by many orders
 of magnitude by taking advantage of recent GPUs.
 
-Theano combines aspects of a computer algebra system (CAS) with aspects of an
+Aesara combines aspects of a computer algebra system (CAS) with aspects of an
 optimizing compiler. It can also generate customized C code for many
 mathematical operations.  This combination of CAS with optimizing compilation
 is particularly useful for tasks in which complicated mathematical expressions
 are evaluated repeatedly and evaluation speed is critical.  For situations
-where many different expressions are each evaluated once Theano can minimize
+where many different expressions are each evaluated once Aesara can minimize
 the amount of compilation/analysis overhead, but still provide symbolic
 features such as automatic differentiation.
 
-Theano's compiler applies many optimizations of varying complexity to
+Aesara's compiler applies many optimizations of varying complexity to
 these symbolic expressions. These optimizations include, but are not
 limited to:
 
@@ -37,17 +37,19 @@ limited to:
 * improvements to numerical stability (e.g.  :math:`\log(1+\exp(x))` and :math:`\log(\sum_i \exp(x[i]))`)
 * for a complete list, see :ref:`optimizations`
 
-Theano was written at the LISA_ lab to support rapid development of
-efficient machine learning algorithms. Theano is
-named after the `Greek mathematician`_, who may have been Pythagoras'
-wife.  Theano is released under a BSD license (:ref:`link <license>`).
+The library that Aesara is based on, Theano, was written at the LISA_ lab to
+support rapid development of efficient machine learning algorithms. Theano was
+named after the `Greek mathematician`_, who may have been Pythagoras' wife.
+Aesara is an alleged daughter of Pythagoras and Theano.
+
+Aesara is released under a BSD license (:ref:`link <license>`).
 
 
 Sneak peek
 ==========
 
-Here is an example of how to use Theano. It doesn't show off many of
-Theano's features, but it illustrates concretely what Theano is.
+Here is an example of how to use Aesara. It doesn't show off many of
+Aesara's features, but it illustrates concretely what Aesara is.
 
 
 .. If you modify this code, also change :
@@ -55,8 +57,8 @@ Theano's features, but it illustrates concretely what Theano is.
 
 .. code-block:: python
 
-    import theano
-    from theano import tensor as tt
+    import aesara
+    from aesara import tensor as tt
 
     # declare two symbolic floating-point scalars
     a = tt.dscalar()
@@ -67,14 +69,14 @@ Theano's features, but it illustrates concretely what Theano is.
 
     # convert the expression into a callable object that takes (a,b)
     # values as input and computes a value for c
-    f = theano.function([a,b], c)
+    f = aesara.function([a,b], c)
 
     # bind 1.5 to 'a', 2.5 to 'b', and evaluate 'c'
     assert 4.0 == f(1.5, 2.5)
 
 
-Theano is not a programming language in the normal sense because you
-write a program in Python that builds expressions for Theano. Still it
+Aesara is not a programming language in the normal sense because you
+write a program in Python that builds expressions for Aesara. Still it
 is like a programming language in the sense that you have to
 
 - declare variables (``a,b``) and give their types
@@ -83,9 +85,9 @@ is like a programming language in the sense that you have to
 
 - compile expression graphs to functions in order to use them for computation.
 
-It is good to think of ``theano.function`` as the interface to a
+It is good to think of ``aesara.function`` as the interface to a
 compiler which builds a callable object from a purely symbolic graph.
-One of Theano's most important features is that ``theano.function``
+One of Aesara's most important features is that ``aesara.function``
 can optimize a graph and even compile some or all of it into native
 machine instructions.
 
@@ -93,28 +95,28 @@ machine instructions.
 What does it do that they don't?
 ================================
 
-Theano is a Python library and optimizing compiler for manipulating
+Aesara is a Python library and optimizing compiler for manipulating
 and evaluating expressions, especially matrix-valued
 ones. Manipulation of matrices is typically done using the numpy
-package, so what does Theano do that Python and numpy do not?
+package, so what does Aesara do that Python and numpy do not?
 
-- *execution speed optimizations*: Theano can use `g++` or `nvcc` to compile
+- *execution speed optimizations*: Aesara can use `g++` or `nvcc` to compile
   parts your expression graph into CPU or GPU instructions, which run
   much faster than pure Python.
 
-- *symbolic differentiation*: Theano can automatically build symbolic graphs
+- *symbolic differentiation*: Aesara can automatically build symbolic graphs
   for computing gradients.
 
-- *stability optimizations*: Theano can recognize [some] numerically unstable
+- *stability optimizations*: Aesara can recognize [some] numerically unstable
   expressions and compute them with more stable algorithms.
 
-The closest Python package to Theano is sympy_.
-Theano focuses more on tensor expressions than Sympy, and has more machinery
+The closest Python package to Aesara is sympy_.
+Aesara focuses more on tensor expressions than Sympy, and has more machinery
 for compilation.  Sympy has more sophisticated algebra rules and can
 handle a wider variety of mathematical operations (such as series, limits, and integrals).
 
 If numpy_ is to be compared to MATLAB_ and sympy_ to Mathematica_,
-Theano is a sort of hybrid of the two which tries to combine the best of
+Aesara is a sort of hybrid of the two which tries to combine the best of
 both worlds.
 
 
@@ -122,100 +124,25 @@ Getting started
 ===============
 
 :ref:`install`
-  Instructions to download and install Theano on your system.
+  Instructions to download and install Aesara on your system.
 
 :ref:`tutorial`
-  Getting started with Theano's basic features. Go here if you are
+  Getting started with Aesara's basic features. Go here if you are
   new!
 
 :ref:`libdoc`
-  Details of what Theano provides. It is recommended to go through
+  Details of what Aesara provides. It is recommended to go through
   the :ref:`tutorial` first though.
 
 
-A PDF version of the online documentation may be found `here
-<http://deeplearning.net/software/theano/theano.pdf>`_.
-
-
-Theano Vision
-=============
-
-This is the vision we have for Theano. This is give people an idea of what to
-expect in the future of Theano, but we can't promise to implement all
-of it. This should also help you to understand where Theano fits in relation
-to other computational tools.
-
-* Support tensor and sparse operations
-* Support linear algebra operations
-* Graph Transformations
-    * Differentiation/higher order differentiation
-    * 'R' and 'L' differential operators
-    * Speed/memory optimizations
-    * Numerical stability optimizations
-* Can use many compiled languages, instructions sets: C/C++, CUDA, OpenCL, PTX, CAL, AVX, ...
-* Lazy evaluation
-* Loop
-* Parallel execution (SIMD, multi-core, multi-node on cluster,
-  multi-node distributed)
-* Support all NumPy/basic SciPy functionality
-* Easy wrapping of library functions in Theano
-
-Note: There is no short term plan to support multi-node computation.
-
-Theano Vision State
-===================
-
-Here is the state of that vision as of November 15th, 2017 (after Theano 1.0.0):
-
-* `MILA will stop developing Theano. <https://groups.google.com/d/msg/theano-users/7Poq8BZutbY/rNCIfvAEAwAJ>`_.
-  We will provide support for one year, starting from ``1.0`` release (November 15th, 2017 to November 15th, 2018).
-* We support tensors using the `numpy.ndarray` object and we support many operations on them.
-* We support sparse types by using the `scipy.{csc,csr,bsr}_matrix` object and support some operations on them.
-* We have implementing/wrapping more advanced linear algebra operations. Still more possible.
-* We have basic support for the creation of new operations from graphs at runtime. It supports well gradient overload
-  for every input and inlining at the start of compilation. We don't cover well the case when it is not inlined.
-* We have many graph transformations that cover the 4 categories listed above.
-* We can improve the graph transformation with better storage optimization
-  and instruction selection.
-
-  * Similar to auto-tuning during the optimization phase, but this
-    doesn't apply to only 1 op.
-  * Example of use: Determine if we should move computation to the
-    GPU or not depending on the input size.
-
-* We support Python 2 and Python 3.
-* We have a new CUDA backend for tensors with many dtype support.
-
-* Loops work, but not all related optimizations are currently done.
-* The cvm linker allows lazy evaluation. It is the current default linker.
-
-  * How to have `DebugMode` check it? Right now, DebugMode checks the computation non-lazily.
-
-* SIMD parallelism on the CPU comes from the compiler.
-* Multi-core parallelism support limited.
-  If the external BLAS implementation supports it,
-  many dot are parallelized via gemm, gemv and ger.
-  Also, element-wise operation are supported. See :ref:`tut_multi_cores`.
-
-* No multi-node support.
-* Most, but not all NumPy functions/aliases are implemented.
-
-  * https://github.com/Theano/Theano/issues/1080
-
-* Wrapping an existing Python function in easy and documented.
-* We know how to separate the shared variable memory
-  storage location from its object type (tensor, sparse, dtype, broadcast
-  flags), but we need to do it.
-
-
 Contact us
 ==========
 
-Questions and bug reports should
-be submitted in the form of an issue at pymc-theano-dev_
+Questions and bug reports should be submitted in the form of an issue at
+pymc-aesara-dev_
 
-We welcome all kinds of contributions. If you have any questions
-regarding how to extend Theano, please feel free to ask.
+We welcome all kinds of contributions. If you have any questions regarding how
+to extend Aesara, please feel free to ask.
 
 
 .. _LISA:  https://mila.umontreal.ca/
@@ -227,4 +154,4 @@ regarding how to extend Theano, please feel free to ask.
 .. _MATLAB: http://www.mathworks.com/products/matlab/
 .. _Mathematica: http://www.wolfram.com/mathematica/
 
-.. _pymc-theano-dev: https://github.com/pymc-devs/Theano-PyMC/issues
+.. _pymc-aesara-dev: https://github.com/pymc-devs/aesara/issues
diff --git a/doc/library/compile/debugmode.txt b/doc/library/compile/debugmode.txt
index 286313ca58..54267ae949 100644
--- a/doc/library/compile/debugmode.txt
+++ b/doc/library/compile/debugmode.txt
@@ -5,7 +5,7 @@
 :mod:`debugmode`
 =================
 
-.. module:: theano.compile.debugmode
+.. module:: aesara.compile.debugmode
    :platform: Unix, Windows
    :synopsis: defines DebugMode
 .. moduleauthor:: LISA
@@ -27,13 +27,13 @@ DebugMode can be used as follows:
 
 .. testcode::
 
-    import theano
-    from theano import tensor as tt
-    from theano.compile.debugmode import DebugMode
+    import aesara
+    from aesara import tensor as tt
+    from aesara.compile.debugmode import DebugMode
 
     x = tt.dscalar('x')
 
-    f = theano.function([x], 10*x, mode='DebugMode')
+    f = aesara.function([x], 10*x, mode='DebugMode')
 
     f(5)
     f(0)
@@ -42,12 +42,12 @@ DebugMode can be used as follows:
 It can also be used by setting the configuration variable :attr:`config.mode`,
 or passing a `DebugMode` instance, as in
 
->>> f = theano.function([x], 10*x, mode=DebugMode(check_c_code=False))
+>>> f = aesara.function([x], 10*x, mode=DebugMode(check_c_code=False))
 
 If any problem is detected, DebugMode will raise an exception according to
 what went wrong, either at call time (``f(5)``) or compile time (
-``f = theano.function(x, 10*x, mode='DebugMode')``). These exceptions
-should *not* be ignored; talk to your local Theano guru or email the
+``f = aesara.function(x, 10*x, mode='DebugMode')``). These exceptions
+should *not* be ignored; talk to your local Aesara guru or email the
 users list if you cannot make the exception go away.
 
 Some kinds of errors can only be detected for certain input value combinations.
@@ -63,7 +63,7 @@ Reference
 
 .. class:: DebugMode(Mode)
 
-    Evaluation Mode that detects internal theano errors.
+    Evaluation Mode that detects internal aesara errors.
 
     This mode catches several kinds of internal error:
 
diff --git a/doc/library/compile/function.txt b/doc/library/compile/function.txt
index f6821cfaef..8a7835517f 100644
--- a/doc/library/compile/function.txt
+++ b/doc/library/compile/function.txt
@@ -2,25 +2,25 @@
 .. _usingfunction:
 
 ===========================================
-:mod:`function` - defines theano.function
+:mod:`function` - defines aesara.function
 ===========================================
 
-.. module:: theano.compile.function
+.. module:: aesara.compile.function
    :platform: Unix, Windows
-   :synopsis: defines theano.function and related classes
+   :synopsis: defines aesara.function and related classes
 .. moduleauthor:: LISA
 
 Guide
 =====
 
-This module provides :func:`function`, commonly accessed as `theano.function`,
+This module provides :func:`function`, commonly accessed as `aesara.function`,
 the interface for compiling graphs into callable objects.
 
 You've already seen example usage in the basic tutorial... something like this:
 
->>> import theano
->>> x = theano.tensor.dscalar()
->>> f = theano.function([x], 2*x)
+>>> import aesara
+>>> x = aesara.tensor.dscalar()
+>>> f = aesara.function([x], 2*x)
 >>> f(4)
 array(8.0)
 
@@ -106,7 +106,7 @@ Reference
 
 .. function:: function(inputs, outputs, mode=None, updates=None, givens=None, no_default_updates=False, accept_inplace=False, name=None, rebuild_strict=True, allow_input_downcast=None, profile=None, on_unused_input='raise')
 
-    Return a :class:`callable object <theano.compile.function.types.Function>` that will calculate `outputs` from `inputs`.
+    Return a :class:`callable object <aesara.compile.function.types.Function>` that will calculate `outputs` from `inputs`.
 
     :type params: list of either Variable or In instances, but not shared
         variables.
@@ -174,7 +174,7 @@ Reference
         list is not used in the graph. Possible values are 'raise',
         'warn', and 'ignore'.
 
-    :rtype: :class:`Function <theano.compile.function.types.Function>`
+    :rtype: :class:`Function <aesara.compile.function.types.Function>`
             instance
 
     :returns: a callable object that will compute the outputs (given the inputs)
@@ -188,7 +188,7 @@ Reference
     about how output variables should be returned.
 
     The default is typically 'FAST_RUN' but this can be changed in
-    :doc:`theano.config <../config>`.  The mode
+    :doc:`aesara.config <../config>`.  The mode
     argument controls the sort of optimizations that will be applied to the
     graph, and the way the optimized graph will be evaluated.
 
@@ -212,7 +212,7 @@ Reference
     givens are different from optimizations in that Var2 is not expected to be
     equivalent to Var1.
 
-.. autofunction:: theano.compile.function.function_dump
+.. autofunction:: aesara.compile.function.function_dump
 
-.. autoclass:: theano.compile.function.types.Function
+.. autoclass:: aesara.compile.function.types.Function
    :members: free, copy, __call__
diff --git a/doc/library/compile/io.txt b/doc/library/compile/io.txt
index 887ade06d9..b7dedd49f3 100644
--- a/doc/library/compile/io.txt
+++ b/doc/library/compile/io.txt
@@ -7,10 +7,10 @@
 .. _function_inputs:
 
 ===========================================
-:mod:`io` - defines theano.function [TODO]
+:mod:`io` - defines aesara.function [TODO]
 ===========================================
 
-.. module:: theano.compile.io
+.. module:: aesara.compile.io
    :platform: Unix, Windows
    :synopsis: defines In and Out
 .. moduleauthor:: LISA
@@ -19,7 +19,7 @@
 Inputs
 ======
 
-The ``inputs`` argument to ``theano.function`` is a list, containing the ``Variable`` instances for which values will be specified at the time of the function call.  But inputs can be more than just Variables.
+The ``inputs`` argument to ``aesara.function`` is a list, containing the ``Variable`` instances for which values will be specified at the time of the function call.  But inputs can be more than just Variables.
 ``In`` instances let us attach properties to ``Variables`` to tell function more about how to use them.
 
 
@@ -80,9 +80,9 @@ A non-None `value` argument makes an In() instance an optional parameter
 of the compiled function.  For example, in the following code we are
 defining an arity-2 function ``inc``.
 
->>> import theano.tensor as tt
->>> from theano import function
->>> from theano.compile.io import In
+>>> import aesara.tensor as tt
+>>> from aesara import function
+>>> from aesara.compile.io import In
 >>> u, x, s = tt.scalars('u', 'x', 's')
 >>> inc = function([u, In(x, value=3), In(s, update=(s+x*u), value=10.0)], [])
 
@@ -123,7 +123,7 @@ array(10.0)
 Input Argument Restrictions
 ---------------------------
 
-The following restrictions apply to the inputs to ``theano.function``:
+The following restrictions apply to the inputs to ``aesara.function``:
 
 - Every input list element must be a valid ``In`` instance, or must be
   upgradable to a valid ``In`` instance. See the shortcut rules below.
@@ -152,7 +152,7 @@ instance explicitly with the ``autoname`` flag set to False.
 Access to function values and containers
 ----------------------------------------
 
-For each input, ``theano.function`` will create a ``Container`` if
+For each input, ``aesara.function`` will create a ``Container`` if
 ``value`` was not already a ``Container`` (or if ``implicit`` was ``False``). At the time of a function call,
 each of these containers must be filled with a value. Each input (but
 especially ones with a default value or an update expression) may have a
@@ -183,7 +183,7 @@ method to access values by indexing a Function directly by typing
 To show some examples of these access methods...
 
 
->>> from theano import tensor as tt, function
+>>> from aesara import tensor as tt, function
 >>> a, b, c = tt.scalars('xys') # set the internal names of graph nodes
 >>> # Note that the name of c is 's', not 'c'!
 >>> fn = function([a, b, ((c, c+a+b), 10.0)], [])
@@ -235,15 +235,15 @@ Every element of the inputs list will be upgraded to an In instance if necessary
 
 Example:
 
->>> import theano
->>> from theano import tensor as tt
->>> from theano.compile.io import In
+>>> import aesara
+>>> from aesara import tensor as tt
+>>> from aesara.compile.io import In
 >>> x = tt.scalar()
 >>> y = tt.scalar('y')
 >>> z = tt.scalar('z')
 >>> w = tt.scalar('w')
 
->>> fn = theano.function(inputs=[x, y, In(z, value=42), ((w, w+x), 0)],
+>>> fn = aesara.function(inputs=[x, y, In(z, value=42), ((w, w+x), 0)],
 ...                      outputs=x + y + z)
 >>> # the first two arguments are required and the last two are
 >>> # optional and initialized to 42 and 0, respectively.
@@ -307,24 +307,24 @@ If a single ``Variable`` or ``Out`` instance is given as argument, then the comp
 If a list of ``Variable`` or ``Out`` instances is given as argument, then the compiled function will return a list of their values.
 
 >>> import numpy
->>> from theano.compile.io import Out
+>>> from aesara.compile.io import Out
 >>> x, y, s = tt.matrices('xys')
 
 >>> # print a list of 2 ndarrays
->>> fn1 = theano.function([x], [x+x, Out((x+x).T, borrow=True)])
+>>> fn1 = aesara.function([x], [x+x, Out((x+x).T, borrow=True)])
 >>> fn1(numpy.asarray([[1,0],[0,1]]))
 [array([[ 2.,  0.],
        [ 0.,  2.]]), array([[ 2.,  0.],
        [ 0.,  2.]])]
 
 >>> # print a list of 1 ndarray
->>> fn2 = theano.function([x], [x+x])
+>>> fn2 = aesara.function([x], [x+x])
 >>> fn2(numpy.asarray([[1,0],[0,1]]))
 [array([[ 2.,  0.],
        [ 0.,  2.]])]
 
 >>> # print an ndarray
->>> fn3 = theano.function([x], outputs=x+x)
+>>> fn3 = aesara.function([x], outputs=x+x)
 >>> fn3(numpy.asarray([[1,0],[0,1]]))
 array([[ 2.,  0.],
        [ 0.,  2.]])
diff --git a/doc/library/compile/mode.txt b/doc/library/compile/mode.txt
index 06267d1860..9af851cb89 100644
--- a/doc/library/compile/mode.txt
+++ b/doc/library/compile/mode.txt
@@ -5,7 +5,7 @@
 :mod:`mode` -- controlling compilation
 ======================================
 
-.. module:: theano.compile.mode
+.. module:: aesara.compile.mode
    :platform: Unix, Windows
    :synopsis: controlling compilation
 .. moduleauthor:: LISA
@@ -13,10 +13,10 @@
 Guide
 =====
 
-The ``mode`` parameter to :func:`theano.function` controls how the
+The ``mode`` parameter to :func:`aesara.function` controls how the
 inputs-to-outputs graph is transformed into a callable object.
 
-Theano defines the following modes by name:
+Aesara defines the following modes by name:
 
 - ``'FAST_COMPILE'``: Apply just a few graph optimizations and only use Python implementations.
 - ``'FAST_RUN'``: Apply all optimizations, and use C implementations where possible.
@@ -26,7 +26,7 @@ Theano defines the following modes by name:
 
 The default mode is typically ``FAST_RUN``, but it can be controlled via the
 configuration variable :attr:`config.mode`, which can be
-overridden by passing the keyword argument to :func:`theano.function`.
+overridden by passing the keyword argument to :func:`aesara.function`.
 
 .. TODO::
 
diff --git a/doc/library/compile/nanguardmode.txt b/doc/library/compile/nanguardmode.txt
index c8baa23452..36e9863ea6 100644
--- a/doc/library/compile/nanguardmode.txt
+++ b/doc/library/compile/nanguardmode.txt
@@ -5,7 +5,7 @@
 :mod:`nanguardmode`
 ===================
 
-.. module:: theano.compile.nanguardmode
+.. module:: aesara.compile.nanguardmode
    :platform: Unix, Windows
    :synopsis: defines NanGuardMode
 .. moduleauthor:: LISA
@@ -24,19 +24,19 @@ NanGuardMode can be used as follows:
 .. testcode::
 
     import numpy
-    import theano
-    import theano.tensor as tt
-    from theano.compile.nanguardmode import NanGuardMode
+    import aesara
+    import aesara.tensor as tt
+    from aesara.compile.nanguardmode import NanGuardMode
 
     x = tt.matrix()
-    w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
+    w = aesara.shared(numpy.random.randn(5, 7).astype(aesara.config.floatX))
     y = tt.dot(x, w)
-    fun = theano.function(
+    fun = aesara.function(
         [x], y,
         mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
     )
 
-While using the theano function ``fun``, it will monitor the values of each
+While using the aesara function ``fun``, it will monitor the values of each
 input and output variable of each node. When abnormal values are
 detected, it raises an error to indicate which node yields the NaNs. For
 example, if we pass the following values to ``fun``:
@@ -44,7 +44,7 @@ example, if we pass the following values to ``fun``:
 .. testcode::
 
     infa = numpy.tile(
-        (numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
+        (numpy.asarray(100.) ** 1000000).astype(aesara.config.floatX), (3, 5))
     fun(infa)
 
 .. testoutput::
@@ -71,4 +71,4 @@ set to be ``True`` by default.
 Reference
 =========
 
-.. autoclass:: theano.compile.nanguardmode.NanGuardMode
+.. autoclass:: aesara.compile.nanguardmode.NanGuardMode
diff --git a/doc/library/compile/opfromgraph.txt b/doc/library/compile/opfromgraph.txt
index 2ea594edb2..20a26c0be3 100644
--- a/doc/library/compile/opfromgraph.txt
+++ b/doc/library/compile/opfromgraph.txt
@@ -6,14 +6,14 @@
 OpFromGraph
 ===========
 
-This page describes :class:`theano.compile.builders.OpFromGraph
-<theano.compile.builders.OpFromGraph>`, an Op that allows to
-encapsulate a Theano graph in an op.
+This page describes :class:`aesara.compile.builders.OpFromGraph
+<aesara.compile.builders.OpFromGraph>`, an Op that allows to
+encapsulate an Aesara graph in an op.
 
 This can be used to encapsulate some functionality in one block. It is
-useful to scale Theano compilation for regular bigger graphs when we
+useful to scale Aesara compilation for regular bigger graphs when we
 reuse that encapsulated functionality with different inputs many
-times. Due to this encapsulation, it can make Theano compilation phase
+times. Due to this encapsulation, it can make Aesara compilation phase
 faster for graphs with many nodes.
 
 Using this for small graphs is not recommended as it disables
@@ -26,4 +26,4 @@ optimizations between what is inside the encapsulation and outside of it.
 
 
 
-.. autoclass:: theano.compile.builders.OpFromGraph
+.. autoclass:: aesara.compile.builders.OpFromGraph
diff --git a/doc/library/compile/ops.txt b/doc/library/compile/ops.txt
index 0c81bf12b2..7689f552de 100644
--- a/doc/library/compile/ops.txt
+++ b/doc/library/compile/ops.txt
@@ -2,5 +2,5 @@
 :mod:`ops` --  Some Common Ops and extra Ops stuff
 ==================================================
 
-.. automodule:: theano.compile.ops
+.. automodule:: aesara.compile.ops
     :members:
diff --git a/doc/library/compile/profilemode.txt b/doc/library/compile/profilemode.txt
index 4435a49b6b..32401da0c8 100644
--- a/doc/library/compile/profilemode.txt
+++ b/doc/library/compile/profilemode.txt
@@ -3,13 +3,13 @@
 .. _profilemode:
 
 ================================================
-:mod:`profilemode` -- profiling Theano functions
+:mod:`profilemode` -- profiling Aesara functions
 ================================================
 
 
-.. module:: theano.compile.profilemode
+.. module:: aesara.compile.profilemode
    :platform: Unix, Windows
-   :synopsis: profiling Theano functions with ProfileMode
+   :synopsis: profiling Aesara functions with ProfileMode
 .. moduleauthor:: LISA
 
 Guide
diff --git a/doc/library/compile/shared.txt b/doc/library/compile/shared.txt
index 80945de417..86b176c399 100644
--- a/doc/library/compile/shared.txt
+++ b/doc/library/compile/shared.txt
@@ -2,12 +2,12 @@
 .. _libdoc_compile_shared:
 
 ===========================================
-:mod:`shared` - defines theano.shared
+:mod:`shared` - defines aesara.shared
 ===========================================
 
-.. module:: theano.compile.sharedvalue
+.. module:: aesara.compile.sharedvalue
    :platform: Unix, Windows
-   :synopsis: defines theano.shared and related classes
+   :synopsis: defines aesara.shared and related classes
 .. moduleauthor:: LISA
 
 
@@ -20,10 +20,10 @@
     The user-friendly constructor is :func:`shared`
 
     .. method:: get_value(self, borrow=False, return_internal_type=False)
-    
+
        :param borrow: True to permit returning of an object aliased to internal memory.
        :type borrow: bool
-               
+
        :param return_internal_type: True to permit the returning of an arbitrary type object used
                internally to store the shared variable.
        :type return_internal_type: bool
@@ -36,7 +36,7 @@
        ``borrow=True`` and ``return_internal_type=True``, it will
        always return the original data, not a copy, but this can be a
        GPU object.
-    
+
     .. method:: set_value(self, new_value, borrow=False)
 
        :param new_value: The new value.
diff --git a/doc/library/config.txt b/doc/library/config.txt
index b9d36880fd..481c74227e 100644
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -1,7 +1,7 @@
 .. _libdoc_config:
 
 =======================================
-:mod:`config` -- Theano Configuration
+:mod:`config` -- Aesara Configuration
 =======================================
 
 .. module:: config
@@ -13,58 +13,58 @@
 Guide
 =====
 
-The config module contains many ``attributes`` that modify Theano's behavior.  Many of these
-attributes are consulted during the import of the ``theano`` module and many are assumed to be
+The config module contains many ``attributes`` that modify Aesara's behavior.  Many of these
+attributes are consulted during the import of the ``aesara`` module and many are assumed to be
 read-only.
 
 *As a rule, the attributes in this module should not be modified by user code.*
 
-Theano's code comes with default values for these attributes, but you can
-override them from your .theanorc file, and override those values in turn by
-the :envvar:`THEANO_FLAGS` environment variable.
+Aesara's code comes with default values for these attributes, but you can
+override them from your ``.aesararc`` file, and override those values in turn by
+the :envvar:`AESARA_FLAGS` environment variable.
 
 The order of precedence is:
 
-1. an assignment to ``theano.config.<property>``
-2. an assignment in :envvar:`THEANO_FLAGS`
-3. an assignment in the .theanorc file (or the file indicated in :envvar:`THEANORC`)
+1. an assignment to ``aesara.config.<property>``
+2. an assignment in :envvar:`AESARA_FLAGS`
+3. an assignment in the ``.aesararc`` file (or the file indicated in :envvar:`AESARARC`)
 
 You can print out the current/effective configuration at any time by printing
-``theano.config``.  For example, to see a list  of all active configuration
+``aesara.config``.  For example, to see a list  of all active configuration
 variables, type this from the command-line:
 
 .. code-block:: bash
 
-    python -c 'import theano; print(theano.config)' | less
+    python -c 'import aesara; print(aesara.config)' | less
 
 Environment Variables
 =====================
 
 
-.. envvar:: THEANO_FLAGS
+.. envvar:: AESARA_FLAGS
 
     This is a list of comma-delimited key=value pairs that control
-    Theano's behavior.
+    Aesara's behavior.
 
-    For example, in bash, you can override your :envvar:`THEANORC` defaults
+    For example, in bash, you can override your :envvar:`AESARARC` defaults
     for <myscript>.py by typing this:
 
     .. code-block:: bash
 
-        THEANO_FLAGS='floatX=float32,device=cuda0,gpuarray__preallocate=1'  python <myscript>.py
+        AESARA_FLAGS='floatX=float32,device=cuda0,gpuarray__preallocate=1'  python <myscript>.py
 
-    If a value is defined several times in ``THEANO_FLAGS``,
+    If a value is defined several times in ``AESARA_FLAGS``,
     the right-most definition is used. So, for instance, if
-    ``THEANO_FLAGS='device=cpu,device=cuda0'``, then cuda0 will be used.
+    ``AESARA_FLAGS='device=cpu,device=cuda0'``, then cuda0 will be used.
 
-.. envvar:: THEANORC
+.. envvar:: AESARARC
 
-    The location[s] of the .theanorc file[s] in ConfigParser format.
-    It defaults to ``$HOME/.theanorc``. On Windows, it defaults to
-    ``$HOME/.theanorc:$HOME/.theanorc.txt`` to make Windows users' life
+    The location(s) of the ``.aesararc`` file(s) in `ConfigParser` format.
+    It defaults to ``$HOME/.aesararc``. On Windows, it defaults to
+    ``$HOME/.aesararc:$HOME/.aesararc.txt`` to make Windows users' life
     easier.
 
-    Here is the .theanorc equivalent to the THEANO_FLAGS in the example above:
+    Here is the ``.aesararc`` equivalent to the ``AESARA_FLAGS`` in the example above:
 
     .. code-block:: cfg
 
@@ -83,24 +83,24 @@ Environment Variables
     section (e.g. ``[gpuarray]``, ``[dnn.conv]``).
 
     Multiple configuration files can be specified by separating them with ':'
-    characters (as in $PATH).  Multiple configuration files will be merged,
+    characters (as in ``$PATH``).  Multiple configuration files will be merged,
     with later (right-most) files taking priority over earlier files in the
     case that multiple files specify values for a common configuration option.
     For example, to override system-wide settings with personal ones,
-    set ``THEANORC=/etc/theanorc:~/.theanorc``. To load configuration files in
-    the current working directory, append ``.theanorc`` to the list of configuration
-    files, e.g. ``THEANORC=~/.theanorc:.theanorc``.
+    set ``AESARARC=/etc/aesararc:~/.aesararc``. To load configuration files in
+    the current working directory, append ``.aesararc`` to the list of configuration
+    files, e.g. ``AESARARC=~/.aesararc:.aesararc``.
 
 Config Attributes
 =====================
 
 The list below describes some of the more common and important flags
 that you might want to use. For the complete list (including documentation),
-import theano and print the config variable, as in:
+import aesara and print the config variable, as in:
 
 .. code-block:: bash
 
-    python -c 'import theano; print(theano.config)' | less
+    python -c 'import aesara; print(aesara.config)' | less
 
 .. attribute:: device
 
@@ -130,7 +130,7 @@ import theano and print the config variable, as in:
     we disable the GPU.  If ``False`` and ``device=gpu*``, and if the
     specified device cannot be used, we warn and fall back to the CPU.
 
-    This is useful to run Theano's tests on a computer with a GPU, but
+    This is useful to run Aesara's tests on a computer with a GPU, but
     without running the GPU tests.
 
     This flag's value cannot be modified during the program execution.
@@ -141,7 +141,7 @@ import theano and print the config variable, as in:
     ``'opencl0:0'``, ``'opencl0:1'``, ...
 
     Initialize the gpu device to use.
-    When its value is ``'cuda*'`` or ``'opencl*'``, the theano
+    When its value is ``'cuda*'`` or ``'opencl*'``, the aesara
     flag :attr:`device` must be ``'cpu'``.
     Unlike :attr:`device`, setting this flag to a specific GPU will not
     try to use this device by default, in particular it will **not** move
@@ -167,7 +167,7 @@ import theano and print the config variable, as in:
     Default: ``'float64'``
 
     This sets the default dtype returned by ``tensor.matrix()``, ``tensor.vector()``,
-    and similar functions.  It also sets the default Theano bit width for
+    and similar functions.  It also sets the default Aesara bit width for
     arguments passed as Python floating-point numbers.
 
 .. attribute:: warn_float64
@@ -198,14 +198,14 @@ import theano and print the config variable, as in:
 
     Default: ``True``
 
-    This sets the default for the use of the Theano garbage collector
-    for intermediate results. To use less memory, Theano frees the
+    This sets the default for the use of the Aesara garbage collector
+    for intermediate results. To use less memory, Aesara frees the
     intermediate results as soon as they are no longer needed.
-    Disabling Theano garbage collection allows Theano to reuse buffers
+    Disabling Aesara garbage collection allows Aesara to reuse buffers
     for intermediate results between function calls. This speeds up
-    Theano by no longer spending time reallocating space. This gives
+    Aesara by no longer spending time reallocating space. This gives
     significant speed up on functions with many ops that are fast to
-    execute, but this increases Theano's memory usage.
+    execute, but this increases Aesara's memory usage.
 
 .. note:: if :attr:`config.gpuarray__preallocate` is the default value
     or not disabled (-1), this is not useful anymore on the GPU.
@@ -278,15 +278,15 @@ import theano and print the config variable, as in:
 
     Enable or disable parallel computation on the CPU with OpenMP.
     It is the default value used when creating an Op that supports it.
-    It is best to define it in .theanorc
-    or in the environment variable THEANO_FLAGS.
+    It is best to define it in ``.aesararc``
+    or in the environment variable ``AESARA_FLAGS``.
 
 .. attribute:: openmp_elemwise_minsize
 
     Positive int value, default: 200000.
 
-    This specifies the vectors minimum size for which elemwise ops
-    use openmp, if openmp is enabled.
+    This specifies the vectors minimum size for which elemwise `Op`s
+    use OpenMP, if OpenMP is enabled.
 
 .. attribute:: cast_policy
 
@@ -294,16 +294,16 @@ import theano and print the config variable, as in:
 
     Default: ``'custom'``
 
-    This specifies how data types are implicitly figured out in Theano, e.g. for
+    This specifies how data types are implicitly figured out in Aesara, e.g. for
     constants or in the results of arithmetic operations. The 'custom' value
     corresponds to a set of custom rules originally used in
-    Theano (which can be partially customized, see e.g. the in-code help of
+    Aesara (which can be partially customized, see e.g. the in-code help of
     ``tensor.NumpyAutocaster``), and will be deprecated in the future.
-    The 'numpy+floatX' setting attempts to mimic the numpy casting rules,
+    The ``'numpy+floatX'`` setting attempts to mimic the numpy casting rules,
     although it prefers to use float32 numbers instead of float64 when
-    ``config.floatX`` is set to 'float32' and the user uses data that is not
+    ``config.floatX`` is set to ``'float32'`` and the user uses data that is not
     explicitly typed as float64 (e.g. regular Python floats).
-    Note that 'numpy+floatX' is not currently behaving exactly as planned (it
+    Note that ``'numpy+floatX'`` is not currently behaving exactly as planned (it
     is a work-in-progress), and thus you should consider it as experimental.
     At the moment it behaves differently from numpy in the following
     situations:
@@ -311,9 +311,9 @@ import theano and print the config variable, as in:
     * Depending on the value of :attr:`config.int_division`, the resulting type
       of a division of integer types with the ``/`` operator may not match
       that of numpy.
-    * On mixed scalar / array operations, numpy tries to prevent the scalar
+    * On mixed scalar and array operations, NumPy tries to prevent the scalar
       from upcasting the array's type unless it is of a fundamentally
-      different type. Theano does not attempt to do the same at this point,
+      different type. Aesara does not attempt to do the same at this point,
       so you should be careful that scalars may upcast arrays when they
       would not when using numpy. This behavior should change in the near
       future.
@@ -328,7 +328,7 @@ import theano and print the config variable, as in:
     ``y`` are of integer types (possibly unsigned). 'int' means an integer is
     returned (as in Python 2.X), but this behavior is deprecated. 'floatX'
     returns a number of type given by ``config.floatX``. 'raise' is the safest
-    choice (and will become default in a future release of Theano) and raises
+    choice (and will become default in a future release of Aesara) and raises
     an error when one tries to do such an operation, enforcing the use of the
     integer division operator (``//``) (if a float result is intended, either
     cast one of the arguments to a float, or use ``x.__truediv__(y)``).
@@ -340,7 +340,7 @@ import theano and print the config variable, as in:
 
     Default: ``'Mode'``
 
-    This sets the default compilation mode for theano functions. By default the
+    This sets the default compilation mode for aesara functions. By default the
     mode Mode is equivalent to FAST_RUN. See Config attribute linker and optimizer.
 
 .. attribute:: profile
@@ -349,7 +349,7 @@ import theano and print the config variable, as in:
 
     Default: ``False``
 
-    Do the vm/cvm linkers profile the execution time of Theano functions?
+    Do the vm/cvm linkers profile the execution time of Aesara functions?
 
     See :ref:`tut_profiling` for examples.
 
@@ -359,7 +359,7 @@ import theano and print the config variable, as in:
 
     Default: ``False``
 
-    Do the vm/cvm linkers profile the memory usage of Theano functions?
+    Do the vm/cvm linkers profile the memory usage of Aesara functions?
     It only works when profile=True.
 
 .. attribute:: profile_optimizer
@@ -368,7 +368,7 @@ import theano and print the config variable, as in:
 
     Default: ``False``
 
-    Do the vm/cvm linkers profile the optimization phase when compiling a Theano function?
+    Do the vm/cvm linkers profile the optimization phase when compiling an Aesara function?
     It only works when profile=True.
 
 .. attribute:: config.profiling__n_apply
@@ -424,7 +424,7 @@ import theano and print the config variable, as in:
 
     Default: ``False``
 
-    Do we ignore the first call to a Theano function while profiling.
+    Do we ignore the first call to an Aesara function while profiling.
 
 .. attribute:: config.lib__amblibm
 
@@ -446,7 +446,7 @@ import theano and print the config variable, as in:
 
     The value represents the start size (either in MB or the fraction
     of total GPU memory) of the memory pool. If more memory is needed,
-    Theano will try to obtain more, but this can cause memory
+    Aesara will try to obtain more, but this can cause memory
     fragmentation.
 
     A negative value will completely disable the allocation cache.
@@ -461,8 +461,7 @@ import theano and print the config variable, as in:
 
         This could cause memory fragmentation. So if you have a memory
         error while using the cache, try to allocate more memory at
-        the start or disable it. If you try this, report your result
-        on `theano-dev`_.
+        the start or disable it.
 
     .. note::
 
@@ -538,10 +537,6 @@ import theano and print the config variable, as in:
     ('raise'), fall into the pdb debugger ('pdb') or ignore it ('ignore').
     We suggest to never use 'ignore' except in tests.
 
-   If you encounter a warning, report it on `theano-dev`_.
-
-.. _theano-dev: http://groups.google.com/group/theano-dev
-
 .. attribute:: assert_no_cpu_op
 
     String value: ``'ignore'`` or ``'warn'`` or ``'raise'`` or ``'pdb'``
@@ -572,18 +567,18 @@ import theano and print the config variable, as in:
 
     Default: ``'0.9'``
 
-    When we fix a Theano bug that generated bad results under some
-    circumstances, we also make Theano raise a warning when it encounters
+    When we fix an Aesara bug that generated bad results under some
+    circumstances, we also make Aesara raise a warning when it encounters
     the same circumstances again. This helps to detect if said bug
     had affected your past experiments, as you only need to run your
     experiment again with the new version, and you do not have to
-    understand the Theano internal that triggered the bug. A better
+    understand the Aesara internal that triggered the bug. A better
     way to detect this will be implemented. See this `ticket
     <http://www.assembla.com/spaces/theano/tickets/514>`__.
 
     This flag allows new users not to get warnings about old bugs, that were
-    fixed before their first checkout of Theano.
-    You can set its value to the first version of Theano
+    fixed before their first checkout of Aesara.
+    You can set its value to the first version of Aesara
     that you used (probably 0.3 or higher)
 
     ``'None'`` means that all warnings will be displayed.
@@ -591,15 +586,15 @@ import theano and print the config variable, as in:
 
     It is recommended that you put a version, so that you will see future
     warnings.
-    It is also recommended you put this into your .theanorc, so this setting
+    It is also recommended you put this into your ``.aesararc``, so this setting
     will always be used.
 
     This flag's value cannot be modified during the program execution.
 
 .. attribute:: base_compiledir
 
-    Default: On Windows: $LOCALAPPDATA\\Theano if $LOCALAPPDATA is defined,
-    otherwise and on other systems: ~/.theano.
+    Default: On Windows: ``$LOCALAPPDATA\\Aesara`` if ``$LOCALAPPDATA`` is defined,
+    otherwise and on other systems: ``~/.aesara``.
 
     This directory stores the platform-dependent compilation directories.
 
@@ -612,7 +607,7 @@ import theano and print the config variable, as in:
     This is a Python format string that specifies the subdirectory
     of ``config.base_compiledir`` in which to store platform-dependent
     compiled modules. To see a list of all available substitution keys,
-    run ``python -c "import theano; print(theano.config)"``, and look
+    run ``python -c "import aesara; print(aesara.config)"``, and look
     for compiledir_format.
 
     This flag's value cannot be modified during the program execution.
@@ -776,12 +771,12 @@ import theano and print the config variable, as in:
     Default: Full path to g++ if g++ is present. Empty string otherwise.
 
     Indicates which C++ compiler to use. If empty, no C++ code is
-    compiled.  Theano automatically detects whether g++ is present and
+    compiled.  Aesara automatically detects whether g++ is present and
     disables C++ compilation when it is not.  On darwin systems (Mac
     OS X), it preferably looks for clang++ and uses that if available.
 
     We print a warning if we detect that no compiler is present. It is
-    recommended to run with C++ compilation as Theano will be much
+    recommended to run with C++ compilation as Aesara will be much
     slower otherwise.
 
     This can be any compiler binary (full path or not) but things may
@@ -927,11 +922,11 @@ import theano and print the config variable, as in:
     Default: ``'off'``
 
     Setting this attribute to something other than ``'off'`` activates a
-    debugging mechanism, where Theano executes the graph on-the-fly, as it is
+    debugging mechanism, where Aesara executes the graph on-the-fly, as it is
     being built. This allows the user to spot errors early on (such as
     dimension mis-match), **before** optimizations are applied.
 
-    Theano will execute the graph using the Constants and/or shared variables
+    Aesara will execute the graph using the Constants and/or shared variables
     provided by the user. Purely symbolic variables (e.g. ``x = T.dmatrix()``) can be
     augmented with test values, by writing to their ``'tag.test_value'``
     attribute (e.g. ``x.tag.test_value = numpy.random.rand(5, 4)``).
@@ -946,24 +941,24 @@ import theano and print the config variable, as in:
 
 .. attribute:: compute_test_value_opt
 
-    As ``compute_test_value``, but it is the value used during Theano
-    optimization phase. Theano user's do not need to use this. This is
-    to help debug shape error in Theano optimization.
+    As ``compute_test_value``, but it is the value used during Aesara
+    optimization phase. Aesara user's do not need to use this. This is
+    to help debug shape error in Aesara optimization.
 
 .. attribute:: print_test_value
 
     Bool value, default: ``False``
 
-    If ``'True'``, Theano will override the ``__str__`` method of its variables
+    If ``'True'``, Aesara will override the ``__str__`` method of its variables
     to also print the tag.test_value when this is available.
 
 .. attribute:: reoptimize_unpickled_function
 
-    Bool value, default: False (changed in master after Theano 0.7 release)
+    Bool value, default: False
 
-    Theano users can use the standard python pickle tools to save a compiled
-    theano function. When pickling, both graph before and after the optimization
-    are saved, including shared variables. When set to True, the graph is
+    Aesara users can use the standard python pickle tools to save a compiled
+    aesara function. When pickling, both graph before and after the optimization
+    are saved, including shared variables. When set to ``True``, the graph is
     reoptimized when being unpickled. Otherwise, skip the graph optimization and
     use directly the optimized graph.
 
@@ -997,7 +992,7 @@ import theano and print the config variable, as in:
     Bool value, default: ``False``
 
     If True, will remove the ``-O*`` parameter passed to g++.
-    This is useful to debug in gdb modules compiled by Theano.
+    This is useful to debug in gdb modules compiled by Aesara.
     The parameter ``-g`` is passed by default to g++.
 
 .. attribute:: config.cmodule__compilation_warning
@@ -1017,7 +1012,7 @@ import theano and print the config variable, as in:
     Int value, default: ``60 * 60 * 24 * 24``  # 24 days
 
     In seconds. The time after which a compiled c module won't be
-    reused by Theano. Automatic deletion of those c module 7 days
+    reused by Aesara. Automatic deletion of those c module 7 days
     after that time.
 
 .. attribute:: config.cmodule__debug
@@ -1036,8 +1031,8 @@ import theano and print the config variable, as in:
 
     Bool value, default: 0
 
-    The number of user stack level to keep for variables during Theano
-    compilation. If higher then 0, will make us keep Theano internal
+    The number of user stack level to keep for variables during Aesara
+    compilation. If higher then 0, will make us keep Aesara internal
     stack trace.
 
 .. attribute:: config.metaopt__verbose
diff --git a/doc/library/d3viz/index.ipynb b/doc/library/d3viz/index.ipynb
index 7f02f5dd6a..6d18a8e947 100644
--- a/doc/library/d3viz/index.ipynb
+++ b/doc/library/d3viz/index.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# d3viz: Interactive visualization of Theano compute graphs "
+    "# d3viz: Interactive visualization of Aesara compute graphs "
    ]
   },
   {
@@ -38,7 +38,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Like Theano’s [printing module](http://deeplearning.net/software/theano/library/printing.html), ``d3viz``\n",
+    "Like Aesara’s printing module, ``d3viz``\n",
     "requires [graphviz](http://www.graphviz.org) binary to be available."
    ]
   },
@@ -53,7 +53,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`d3viz` extends Theano’s [printing module](http://deeplearning.net/software/theano/library/printing.html) to interactively visualize compute graphs. Instead of creating a static picture, it creates an HTML file, which can be opened with current web-browsers. `d3viz` allows\n",
+    "`d3viz` extends Aesara’s printing module to interactively visualize compute graphs. Instead of creating a static picture, it creates an HTML file, which can be opened with current web-browsers. `d3viz` allows\n",
     "\n",
     "* to zoom to different regions and to move graphs via drag and drop,\n",
     "* to position nodes both manually and automatically,\n",
@@ -71,8 +71,8 @@
    },
    "outputs": [],
    "source": [
-    "import theano as th\n",
-    "import theano.tensor as tt\n",
+    "import aesara as th\n",
+    "import aesara.tensor as tt\n",
     "import numpy as np"
    ]
   },
@@ -132,7 +132,7 @@
     }
    ],
    "source": [
-    "from theano.printing import pydotprint\n",
+    "from aesara.printing import pydotprint\n",
     "import os\n",
     "\n",
     "if not os.path.exists('examples'):\n",
@@ -183,7 +183,7 @@
    },
    "outputs": [],
    "source": [
-    "import theano.d3viz as d3v\n",
+    "import aesara.d3viz as d3v\n",
     "d3v.d3viz(predict, 'examples/mlp.html')"
    ]
   },
@@ -264,7 +264,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Theano allows [function profiling](http://deeplearning.net/software/theano/tutorial/profiling.html) via the `profile=True` flag. After at least one function call, the compute time of each node can be printed in text form with `debugprint`. However, analyzing complex graphs in this way can be cumbersome.\n",
+    "Aesara allows function profiling via the `profile=True` flag. After at least one function call, the compute time of each node can be printed in text form with `debugprint`. However, analyzing complex graphs in this way can be cumbersome.\n",
     "\n",
     "`d3viz` can visualize the same timing information graphically, and hence help to spot bottlenecks in the compute graph more easily! To begin with, we will redefine the `predict` function, this time by using `profile=True` flag. Afterwards, we capture the runtime on random data:"
    ]
@@ -378,7 +378,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "An [OpFromGraph](http://deeplearning.net/software/theano/library/compile/opfromgraph.html) node defines a new operation, which can be called with different inputs at different places in the compute graph. Each `OpFromGraph` node defines a nested graph, which will be visualized accordingly by `d3viz`."
+    "An `OpFromGraph` node defines a new operation, which can be called with different inputs at different places in the compute graph. Each `OpFromGraph` node defines a nested graph, which will be visualized accordingly by `d3viz`."
    ]
   },
   {
diff --git a/doc/library/d3viz/index.txt b/doc/library/d3viz/index.txt
index 545aa04ece..413a9842e5 100644
--- a/doc/library/d3viz/index.txt
+++ b/doc/library/d3viz/index.txt
@@ -1,12 +1,12 @@
 .. _libdoc_d3viz:
 
 =========================================================================
-:mod:`d3viz` -- d3viz: Interactive visualization of Theano compute graphs
+:mod:`d3viz` -- d3viz: Interactive visualization of Aesara compute graphs
 =========================================================================
 
-.. module:: theano.d3viz
+.. module:: aesara.d3viz
    :platform: Unix, Windows
-   :synopsis: Allows to interactively visualize Theano compute graphs
+   :synopsis: Allows to interactively visualize Aesara compute graphs
 .. moduleauthor:: Christof Angermueller
 
 
@@ -22,18 +22,15 @@ maintained, and it works both in Python 2.x and 3.x. Install it with pip::
 
     pip install pydot-ng
 
-Like Theano’s `printing module
-<http://deeplearning.net/software/theano/library/printing.html>`__, ``d3viz``
+Like Aesara’s printing module, ``d3viz``
 requires `graphviz <http://www.graphviz.org/>`__ binary to be available.
 
 Overview
 --------
 
-``d3viz`` extends Theano’s `printing
-module <http://deeplearning.net/software/theano/library/printing.html>`__
-to interactively visualize compute graphs. Instead of creating a static
-picture, it creates an HTML file, which can be opened with current
-web-browsers. ``d3viz`` allows
+``d3viz`` extends Aesara’s printing module to interactively visualize compute
+graphs. Instead of creating a static picture, it creates an HTML file, which can
+be opened with current web-browsers. ``d3viz`` allows
 
 -  to zoom to different regions and to move graphs via drag and drop,
 -  to position nodes both manually and automatically,
@@ -53,8 +50,8 @@ hidden layer and a softmax output layer.
 
 .. code:: python
 
-    import theano as th
-    import theano.tensor as tt
+    import aesara as th
+    import aesara.tensor as tt
     import numpy as np
 
     ninputs = 1000
@@ -75,11 +72,11 @@ hidden layer and a softmax output layer.
     predict = th.function([x], y)
 
 The function ``predict`` outputs the probability of 10 classes. You can
-visualize it with :py:func:`theano.printing.pydotprint` as follows:
+visualize it with :py:func:`aesara.printing.pydotprint` as follows:
 
 .. code:: python
 
-    from theano.printing import pydotprint
+    from aesara.printing import pydotprint
     import os
 
     if not os.path.exists('examples'):
@@ -104,12 +101,12 @@ visualize it with :py:func:`theano.printing.pydotprint` as follows:
 
 
 
-To visualize it interactively, import :py:func:`theano.d3viz.d3viz.d3viz` from
-the the :py:mod:`theano.d3viz.d3viz` module, which can be called as before:
+To visualize it interactively, import :py:func:`aesara.d3viz.d3viz.d3viz` from
+the the :py:mod:`aesara.d3viz.d3viz` module, which can be called as before:
 
 .. code:: python
 
-    import theano.d3viz as d3v
+    import aesara.d3viz as d3v
     d3v.d3viz(predict, 'examples/mlp.html')
 
 `Open visualization! <../../_static/mlp.html>`__
@@ -142,11 +139,9 @@ in /tmp/).
 Profiling
 ---------
 
-Theano allows `function
-profiling <http://deeplearning.net/software/theano/tutorial/profiling.html>`__
-via the ``profile=True`` flag. After at least one function call, the
-compute time of each node can be printed in text form with
-``debugprint``. However, analyzing complex graphs in this way can be
+Aesara allows function profiling via the ``profile=True`` flag. After at least
+one function call, the compute time of each node can be printed in text form
+with ``debugprint``. However, analyzing complex graphs in this way can be
 cumbersome.
 
 ``d3viz`` can visualize the same timing information graphically, and
@@ -200,7 +195,7 @@ export graphs to different formats.
 .. image:: index_files/index_24_0.png
 
 
-Here, we used the :py:class:`theano.d3viz.formatting.PyDotFormatter` class to
+Here, we used the :py:class:`aesara.d3viz.formatting.PyDotFormatter` class to
 convert the compute graph into a ``pydot`` graph, and created a
 :download:`PNG <examples/mlp2.png>` and :download:`PDF <examples/mlp2.pdf>`
 file. You can find all output formats supported by Graphviz `here
@@ -209,12 +204,9 @@ file. You can find all output formats supported by Graphviz `here
 OpFromGraph nodes
 -----------------
 
-An
-`OpFromGraph <http://deeplearning.net/software/theano/library/compile/opfromgraph.html>`__
-node defines a new operation, which can be called with different inputs
-at different places in the compute graph. Each ``OpFromGraph`` node
-defines a nested graph, which will be visualized accordingly by
-``d3viz``.
+An ``OpFromGraph`` node defines a new operation, which can be called with
+different inputs at different places in the compute graph. Each ``OpFromGraph``
+node defines a nested graph, which will be visualized accordingly by ``d3viz``.
 
 .. code:: python
 
@@ -278,13 +270,13 @@ References
 d3viz module
 ------------
 
-.. automodule:: theano.d3viz.d3viz
+.. automodule:: aesara.d3viz.d3viz
   :members:
 
 PyDotFormatter
 --------------
 
-.. autoclass:: theano.d3viz.formatting.PyDotFormatter
+.. autoclass:: aesara.d3viz.formatting.PyDotFormatter
   :members: __call__
   :special-members:
   :private-members:
diff --git a/doc/library/gpuarray/ctc.txt b/doc/library/gpuarray/ctc.txt
index 8eb144f47b..d5babf4d4d 100644
--- a/doc/library/gpuarray/ctc.txt
+++ b/doc/library/gpuarray/ctc.txt
@@ -1,7 +1,7 @@
 .. _libdoc_gpuarray_ctc:
 
 ================================================================================
-:mod:`theano.gpuarray.ctc` -- Connectionist Temporal Classification (CTC) loss
+:mod:`aesara.gpuarray.ctc` -- Connectionist Temporal Classification (CTC) loss
 ================================================================================
 
 
@@ -24,10 +24,10 @@
     Unfortunately, Windows platforms are not yet supported by the underlying
     library.
 
-.. module:: theano.gpuarray.ctc
+.. module:: aesara.gpuarray.ctc
    :platform: Unix
    :synopsis: Connectionist temporal classification (CTC) loss Op, using the warp-ctc library
 .. moduleauthor:: `João Victor Risso <https://github.com/joaovictortr>`_
 
-.. autofunction:: theano.gpuarray.ctc.gpu_ctc
-.. autoclass:: theano.gpuarray.ctc.GpuConnectionistTemporalClassification
+.. autofunction:: aesara.gpuarray.ctc.gpu_ctc
+.. autoclass:: aesara.gpuarray.ctc.GpuConnectionistTemporalClassification
diff --git a/doc/library/gpuarray/dnn.txt b/doc/library/gpuarray/dnn.txt
index 1103a6c1ee..4b1c5c7599 100644
--- a/doc/library/gpuarray/dnn.txt
+++ b/doc/library/gpuarray/dnn.txt
@@ -1,7 +1,7 @@
 .. _libdoc_gpuarray_dnn:
 
 ===========================================
-:mod:`theano.gpuarray.dnn` -- cuDNN
+:mod:`aesara.gpuarray.dnn` -- cuDNN
 ===========================================
 
 .. moduleauthor:: LISA
@@ -34,18 +34,16 @@ There are at least three possible ways of doing so:
 - And as a third way, also on Linux, you can copy the ``*.h`` files
   to ``/usr/include`` and the ``*.so*`` files to ``/lib64``.
 
-By default, Theano will detect if it can use cuDNN. If so, it will use
-it.  If not, Theano optimizations will not introduce cuDNN ops. So
-Theano will still work if the user did not introduce them manually.
+By default, Aesara will detect if it can use cuDNN. If so, it will use
+it.  If not, Aesara optimizations will not introduce cuDNN ops. So
+Aesara will still work if the user did not introduce them manually.
 
-To get an error if Theano can not use cuDNN, use this Theano flag:
+To get an error if Aesara can not use cuDNN, use this Aesara flag:
 ``optimizer_including=cudnn``.
 
 .. note::
 
-   cuDNN v5.1 is supported in Theano master version. So it dropped cuDNN v3 support.
-   Theano 0.8.0 and 0.8.1 support only cuDNN v3 and v4.
-   Theano 0.8.2 will support only v4 and v5.
+   cuDNN v5.1 is supported in Aesara master version. So it dropped cuDNN v3 support.
 
 .. note::
 
@@ -53,8 +51,8 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
    it is possible to use heuristics to automatically choose a convolution
    implementation well suited to the parameters of the convolution.
 
-   The Theano flag ``dnn__conv__algo_fwd`` allows to specify the cuDNN
-   convolution implementation that Theano should use for forward convolutions.
+   The Aesara flag ``dnn__conv__algo_fwd`` allows to specify the cuDNN
+   convolution implementation that Aesara should use for forward convolutions.
    Possible values include :
 
    * ``small`` (default) : use a convolution implementation with small memory
@@ -76,8 +74,8 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
      implementation selected every time the shapes of the inputs and kernels
      don't match the shapes from the last execution.
 
-   The Theano flag ``dnn.conv.algo_bwd`` allows to specify the cuDNN
-   convolution implementation that Theano should use for gradient convolutions.
+   The Aesara flag ``dnn.conv.algo_bwd`` allows to specify the cuDNN
+   convolution implementation that Aesara should use for gradient convolutions.
    Possible values include :
 
    * ``none`` (default) : use the default non-deterministic convolution
@@ -118,7 +116,7 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
     input of the convolution. They are also used sometimes in the forward
     pass, when they give a speed up.
 
-    The Theano flag ``dnn.conv.algo_bwd`` can be use to force the use of a
+    The Aesara flag ``dnn.conv.algo_bwd`` can be use to force the use of a
     slower but deterministic convolution implementation.
 
 .. note::
@@ -133,21 +131,21 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
 
 
 - Convolution:
-    - :func:`theano.gpuarray.dnn.dnn_conv`, :func:`theano.gpuarray.dnn.dnn_conv3d`.
-    - :func:`theano.gpuarray.dnn.dnn_gradweight`, :func:`theano.gpuarray.dnn.dnn_gradweight3d`.
-    - :func:`theano.gpuarray.dnn.dnn_gradinput`, :func:`theano.gpuarray.dnn.dnn_gradinput3d`.
+    - :func:`aesara.gpuarray.dnn.dnn_conv`, :func:`aesara.gpuarray.dnn.dnn_conv3d`.
+    - :func:`aesara.gpuarray.dnn.dnn_gradweight`, :func:`aesara.gpuarray.dnn.dnn_gradweight3d`.
+    - :func:`aesara.gpuarray.dnn.dnn_gradinput`, :func:`aesara.gpuarray.dnn.dnn_gradinput3d`.
 - Pooling:
-    - :func:`theano.gpuarray.dnn.dnn_pool`.
+    - :func:`aesara.gpuarray.dnn.dnn_pool`.
 - Batch Normalization:
-    - :func:`theano.gpuarray.dnn.dnn_batch_normalization_train`
-    - :func:`theano.gpuarray.dnn.dnn_batch_normalization_test`.
+    - :func:`aesara.gpuarray.dnn.dnn_batch_normalization_train`
+    - :func:`aesara.gpuarray.dnn.dnn_batch_normalization_test`.
 - RNN:
-    - :class:`theano.gpuarray.dnn.RNNBlock`
+    - :class:`aesara.gpuarray.dnn.RNNBlock`
 - Softmax:
     - You can manually use the op :class:`GpuDnnSoftmax
-      <theano.gpuarray.dnn.GpuDnnSoftmax>` to use its extra feature.
+      <aesara.gpuarray.dnn.GpuDnnSoftmax>` to use its extra feature.
 - Spatial Transformer:
-    - :func:`theano.gpuarray.dnn.dnn_spatialtf`.
+    - :func:`aesara.gpuarray.dnn.dnn_spatialtf`.
 
 
 cuDNN RNN Example
@@ -172,13 +170,13 @@ To clarify the rest of the code we define some variables to hold sizes.
 
 .. code-block:: python
 
-    from theano.tensor.type import tensor3
+    from aesara.tensor.type import tensor3
 
     X = tensor3('X')
     Y = tensor3('Y')
     h0 = tensor3('h0')
 
-We also define some Theano variables to work with.  Here `X` is input,
+We also define some Aesara variables to work with.  Here `X` is input,
 `Y` is output (as in expected output) and `h0` is the initial state
 for the recurrent inputs.
 
@@ -186,7 +184,7 @@ for the recurrent inputs.
 
     rnnb = dnn.RNNBlock(dtype, hidden_dim, depth, 'gru')
 
-This defines an RNNBlock.  This is a departure from usual Theano
+This defines an RNNBlock.  This is a departure from usual Aesara
 operations in that it has the structure of a layer more than a
 separate operation.  This is constrained by the underlying API.
 
@@ -194,7 +192,7 @@ separate operation.  This is constrained by the underlying API.
 
     psize = rnnb.get_param_size([batch_size, input_dim])
     params_cudnn = gpuarray_shared_constructor(
-        np.zeros((psize,), dtype=theano.config.floatX))
+        np.zeros((psize,), dtype=aesara.config.floatX))
 
 Here we allocate space for the trainable parameters of the RNN.  The
 first function tells us how many elements we will need to store the
@@ -231,16 +229,16 @@ vector).
 
     y, hy = rnnb.apply(params_cudnn, X, h0)
 
-This is more akin to an op in Theano in that it will apply the RNN
+This is more akin to an op in Aesara in that it will apply the RNN
 operation to a set of symbolic inputs and return symbolic outputs.
 `y` is the output, `hy` is the final state for the recurrent inputs.
 
 After this, the gradient works as usual so you can treat the returned
-symbolic outputs as normal Theano symbolic variables.
+symbolic outputs as normal Aesara symbolic variables.
 
 
 List of Implemented Operations
 ==============================
 
-.. automodule:: theano.gpuarray.dnn
+.. automodule:: aesara.gpuarray.dnn
    :members:
diff --git a/doc/library/gpuarray/extra.txt b/doc/library/gpuarray/extra.txt
index 7b8a81771e..071d5c74d7 100644
--- a/doc/library/gpuarray/extra.txt
+++ b/doc/library/gpuarray/extra.txt
@@ -7,17 +7,17 @@ Utility functions
 Optimization
 ------------
 
-.. automodule:: theano.gpuarray.opt_util
+.. automodule:: aesara.gpuarray.opt_util
    :members:
 
 Kernel generation
 -----------------
 
-.. automodule:: theano.gpuarray.kernel_codegen
+.. automodule:: aesara.gpuarray.kernel_codegen
    :members:
 
 float16
 -------
 
-.. automodule:: theano.gpuarray.fp16_help
+.. automodule:: aesara.gpuarray.fp16_help
    :members:
diff --git a/doc/library/gpuarray/fft.txt b/doc/library/gpuarray/fft.txt
index 7b5c67a874..effcbae31f 100644
--- a/doc/library/gpuarray/fft.txt
+++ b/doc/library/gpuarray/fft.txt
@@ -1,7 +1,7 @@
 .. _libdoc_gpuarray_fft:
 
 =====================================================
-:mod:`theano.gpuarray.fft` -- Fast Fourier Transforms
+:mod:`aesara.gpuarray.fft` -- Fast Fourier Transforms
 =====================================================
 
 Performs Fast Fourier Transforms (FFT) on the GPU.
@@ -15,27 +15,27 @@ FFT gradients are implemented as the opposite Fourier transform of the output gr
 
 .. warning ::
     The real and imaginary parts of the Fourier domain arrays are stored as a pair of float32
-    arrays, emulating complex64. Since theano has limited support for complex
+    arrays, emulating complex64. Since aesara has limited support for complex
     number operations, care must be taken to manually implement operations such as gradients.
 
-.. automodule:: theano.gpuarray.fft
+.. automodule:: aesara.gpuarray.fft
    :members: curfft, cuirfft
 
 For example, the code below performs the real input FFT of a box function, which is a sinc function.
 The absolute value is plotted, since the phase oscillates due to the box function being
-shifted to the middle of the array. The Theano flag ``device=cuda{0,1...}`` must be used.
+shifted to the middle of the array. The Aesara flag ``device=cuda{0,1...}`` must be used.
 
 .. testcode::
 
     import numpy as np
-    import theano
-    import theano.tensor as tt
-    from theano.gpuarray import fft
+    import aesara
+    import aesara.tensor as tt
+    from aesara.gpuarray import fft
 
     x = tt.matrix('x', dtype='float32')
 
     rfft = fft.curfft(x, norm='ortho')
-    f_rfft = theano.function([x], rfft)
+    f_rfft = aesara.function([x], rfft)
 
     N = 1024
     box = np.zeros((1, N), dtype='float32')
diff --git a/doc/library/gpuarray/index.txt b/doc/library/gpuarray/index.txt
index b197d88b86..181a7f0dde 100644
--- a/doc/library/gpuarray/index.txt
+++ b/doc/library/gpuarray/index.txt
@@ -5,7 +5,7 @@
 :mod:`gpuarray` -- The (new) GPU backend
 =======================================================
 
-.. module:: theano.gpuarray
+.. module:: aesara.gpuarray
    :platform: Unix, Windows
    :synopsis: Code for GPU programming (new)
 .. moduleauthor:: MILA
diff --git a/doc/library/gpuarray/linalg.txt b/doc/library/gpuarray/linalg.txt
index b2471a467c..9b750d6d5e 100644
--- a/doc/library/gpuarray/linalg.txt
+++ b/doc/library/gpuarray/linalg.txt
@@ -1,13 +1,13 @@
 .. _libdoc_gpuarray_linalg:
 
 =========================================================
-:mod:`theano.gpuarray.linalg` -- Linear algebra operation
+:mod:`aesara.gpuarray.linalg` -- Linear algebra operation
 =========================================================
 
 
 .. warning::
 
-   Some operation need Magma to be installed and the Theano flags
+   Some operation need Magma to be installed and the Aesara flags
    :attr:`config.magma__enabled=True` to be activated. See also the
    flags :attr:`config.magma__include_path` and
    :attr:`config.magma__library_path`.
@@ -15,5 +15,5 @@
 Linalg Op
 =========
 
-.. automodule:: theano.gpuarray.linalg
+.. automodule:: aesara.gpuarray.linalg
     :members:
diff --git a/doc/library/gpuarray/op.txt b/doc/library/gpuarray/op.txt
index 9546479589..16c00fc9ef 100644
--- a/doc/library/gpuarray/op.txt
+++ b/doc/library/gpuarray/op.txt
@@ -6,39 +6,39 @@ List of gpuarray Ops implemented
 
 .. moduleauthor:: LISA
 
-Normally you should not call directly those Ops! Theano should
+Normally you should not call directly those Ops! Aesara should
 automatically transform CPU ops to their GPU equivalent. So this list
 is just useful to let people know what is implemented on the GPU.
 
 Basic Op
 ========
 
-.. automodule:: theano.gpuarray.basic_ops
+.. automodule:: aesara.gpuarray.basic_ops
     :members:
 
 Blas Op
 =======
 
-.. automodule:: theano.gpuarray.blas
+.. automodule:: aesara.gpuarray.blas
     :members:
 
 Elemwise Op
 ===========
 
-.. automodule:: theano.gpuarray.elemwise
+.. automodule:: aesara.gpuarray.elemwise
     :members:
 
 Subtensor Op
 ============
 
-.. automodule:: theano.gpuarray.subtensor
+.. automodule:: aesara.gpuarray.subtensor
     :members:
 
 Nnet Op
 =======
 
-.. automodule:: theano.gpuarray.nnet
+.. automodule:: aesara.gpuarray.nnet
     :members:
 
-.. automodule:: theano.gpuarray.neighbours
+.. automodule:: aesara.gpuarray.neighbours
     :members:
diff --git a/doc/library/gpuarray/type.txt b/doc/library/gpuarray/type.txt
index 2dc29e1079..12149d5ba0 100644
--- a/doc/library/gpuarray/type.txt
+++ b/doc/library/gpuarray/type.txt
@@ -1,8 +1,8 @@
 .. _libdoc_gpuarray_type:
 
 ===================================================
-:mod:`theano.gpuarray.type` -- Type classes
+:mod:`aesara.gpuarray.type` -- Type classes
 ===================================================
 
-.. automodule:: theano.gpuarray.type
+.. automodule:: aesara.gpuarray.type
    :members:
diff --git a/doc/library/gradient.txt b/doc/library/gradient.txt
index 3e76110f32..5b92005577 100644
--- a/doc/library/gradient.txt
+++ b/doc/library/gradient.txt
@@ -11,7 +11,7 @@
 
 .. testsetup:: *
 
-   from theano.gradient import *
+   from aesara.gradient import *
 
 Symbolic gradient is usually computed from :func:`gradient.grad`, which offers a
 more convenient syntax for the common case of wanting the gradient of some
@@ -23,7 +23,7 @@ awkward to use when :func:`gradient.grad` can do the job.
 Gradient related functions
 ==========================
 
-.. automodule:: theano.gradient
+.. automodule:: aesara.gradient
     :members:
 
 .. _R_op_list:
diff --git a/doc/library/graph/fgraph.txt b/doc/library/graph/fgraph.txt
index e99266ab4b..d07699c79a 100644
--- a/doc/library/graph/fgraph.txt
+++ b/doc/library/graph/fgraph.txt
@@ -5,9 +5,9 @@
 :mod:`fg` -- Graph Container [doc TODO]
 ================================================
 
-.. module:: theano.graph.fg
+.. module:: aesara.graph.fg
    :platform: Unix, Windows
-   :synopsis: Theano Internals
+   :synopsis: Aesara Internals
 .. moduleauthor:: LISA
 
 
@@ -16,7 +16,7 @@
 FunctionGraph
 -------------
 
-.. autoclass:: theano.graph.fg.FunctionGraph
+.. autoclass:: aesara.graph.fg.FunctionGraph
     :members:
 
     ***TODO***
@@ -33,7 +33,7 @@ FunctionGraph
 FunctionGraph Features
 ----------------------
 
-.. autoclass:: theano.graph.toolbox.Feature
+.. autoclass:: aesara.graph.toolbox.Feature
     :members:
 
 .. _libdoc_graph_fgraphfeaturelist:
diff --git a/doc/library/graph/graph.txt b/doc/library/graph/graph.txt
index fc32ddde48..f59cac0356 100644
--- a/doc/library/graph/graph.txt
+++ b/doc/library/graph/graph.txt
@@ -1,14 +1,14 @@
 .. _libdoc_graph_graph:
 
 ==============================================
-:mod:`graph` -- Interface for the Theano graph
+:mod:`graph` -- Interface for the Aesara graph
 ==============================================
 
 ---------
 Reference
 ---------
 
-.. automodule:: theano.graph.basic
+.. automodule:: aesara.graph.basic
    :platform: Unix, Windows
    :synopsis: Interface for types of symbolic variables
    :members:
diff --git a/doc/library/graph/op.txt b/doc/library/graph/op.txt
index f68b9ab325..892f31eb2c 100644
--- a/doc/library/graph/op.txt
+++ b/doc/library/graph/op.txt
@@ -5,7 +5,7 @@
 :mod:`graph` -- Objects and functions for computational graphs
 ================================================
 
-.. automodule:: theano.graph.op
+.. automodule:: aesara.graph.op
    :platform: Unix, Windows
    :synopsis: Interface for types of symbolic variables
    :members:
diff --git a/doc/library/graph/params_type.txt b/doc/library/graph/params_type.txt
index c2cf3ca84a..d43cdae814 100644
--- a/doc/library/graph/params_type.txt
+++ b/doc/library/graph/params_type.txt
@@ -1,14 +1,14 @@
 .. _libdoc_graph_params_type:
 
 ============================================================
-:mod:`theano.graph.params_type` -- Wrapper class for op params
+:mod:`aesara.graph.params_type` -- Wrapper class for op params
 ============================================================
 
 ---------
 Reference
 ---------
 
-.. automodule:: theano.graph.params_type
+.. automodule:: aesara.graph.params_type
    :platform: Unix, Windows
    :synopsis: Wrapper class for op params
    :members:
diff --git a/doc/library/graph/toolbox.txt b/doc/library/graph/toolbox.txt
index 053c325fc5..a348b6a998 100644
--- a/doc/library/graph/toolbox.txt
+++ b/doc/library/graph/toolbox.txt
@@ -4,9 +4,9 @@
 :mod:`toolbox` -- [doc TODO]
 ================================================
 
-.. module:: theano.graph.toolbox
+.. module:: aesara.graph.toolbox
    :platform: Unix, Windows
-   :synopsis: Theano Internals
+   :synopsis: Aesara Internals
 .. moduleauthor:: LISA
 
 Guide
diff --git a/doc/library/graph/type.txt b/doc/library/graph/type.txt
index 513c16e44a..59f18d54f4 100644
--- a/doc/library/graph/type.txt
+++ b/doc/library/graph/type.txt
@@ -8,7 +8,7 @@
 Reference
 ---------
 
-.. automodule:: theano.graph.type
+.. automodule:: aesara.graph.type
    :platform: Unix, Windows
    :synopsis: Interface for types of symbolic variables
    :members:
diff --git a/doc/library/graph/utils.txt b/doc/library/graph/utils.txt
index ebe36e3fb3..44ddea0d82 100644
--- a/doc/library/graph/utils.txt
+++ b/doc/library/graph/utils.txt
@@ -6,13 +6,13 @@
 
 .. testsetup:: *
 
-   from theano.graph.utils import *
+   from aesara.graph.utils import *
 
 ---------
 Reference
 ---------
 
-.. automodule:: theano.graph.utils
+.. automodule:: aesara.graph.utils
    :platform: Unix, Windows
    :synopsis: Utilities functions operating on the graph
    :members:
diff --git a/doc/library/index.txt b/doc/library/index.txt
index 094fcb8885..4539d11f6b 100644
--- a/doc/library/index.txt
+++ b/doc/library/index.txt
@@ -6,7 +6,7 @@
 API Documentation
 =================
 
-This documentation covers Theano module-wise.  This is suited to finding the
+This documentation covers Aesara module-wise.  This is suited to finding the
 Types and Ops that you can use to build and compile expression graphs.
 
 .. toctree::
@@ -32,23 +32,23 @@ Types and Ops that you can use to build and compile expression graphs.
 There are also some top-level imports that you might find more convenient:
 
 
-.. module:: theano
+.. module:: aesara
    :platform: Unix, Windows
-   :synopsis: Theano top-level import
+   :synopsis: Aesara top-level import
 .. moduleauthor:: LISA
 
 .. function:: function(...)
 
-    Alias for :func:`theano.compile.function.function`
+    Alias for :func:`aesara.compile.function.function`
 
 
 .. function:: function_dump(...)
 
-    Alias for :func:`theano.compile.function.function_dump`
+    Alias for :func:`aesara.compile.function.function_dump`
 
 .. function:: shared(...)
 
-    Alias for :func:`theano.compile.sharedvalue.shared`
+    Alias for :func:`aesara.compile.sharedvalue.shared`
 
 .. class:: In
 
@@ -58,4 +58,4 @@ There are also some top-level imports that you might find more convenient:
 
     Works like :func:`tensor.dot` for both sparse and dense matrix products
 
-.. autofunction:: theano.clone_replace
+.. autofunction:: aesara.clone_replace
diff --git a/doc/library/misc/pkl_utils.txt b/doc/library/misc/pkl_utils.txt
index 8b3b057210..17baf94521 100644
--- a/doc/library/misc/pkl_utils.txt
+++ b/doc/library/misc/pkl_utils.txt
@@ -7,13 +7,13 @@
 
 .. testsetup:: *
 
-   from theano.misc.pkl_utils import *
+   from aesara.misc.pkl_utils import *
 
-.. autofunction:: theano.misc.pkl_utils.dump
+.. autofunction:: aesara.misc.pkl_utils.dump
 
-.. autofunction:: theano.misc.pkl_utils.load
+.. autofunction:: aesara.misc.pkl_utils.load
 
-.. autoclass:: theano.misc.pkl_utils.StripPickler
+.. autoclass:: aesara.misc.pkl_utils.StripPickler
 
 .. seealso::
 
diff --git a/doc/library/printing.txt b/doc/library/printing.txt
index 7b9b386781..ab52b44e77 100644
--- a/doc/library/printing.txt
+++ b/doc/library/printing.txt
@@ -4,14 +4,14 @@
 :mod:`printing` -- Graph Printing and Symbolic Print Statement
 ===============================================================
 
-.. module:: theano.printing
+.. module:: aesara.printing
    :platform: Unix, Windows
    :synopsis: Provides the Print Op and graph-printing routines.
 .. moduleauthor:: LISA
 
 .. testsetup::
 
-   import theano
+   import aesara
 
 Guide
 ======
@@ -20,10 +20,10 @@ Printing during execution
 -------------------------
 
 Intermediate values in a computation cannot be printed in
-the normal python way with the print statement, because Theano has no *statements*.
+the normal python way with the print statement, because Aesara has no *statements*.
 Instead there is the :class:`Print` Op.
 
->>> from theano import tensor as tt, function, printing
+>>> from aesara import tensor as tt, function, printing
 >>> x = tt.dvector()
 >>> hello_world_op = printing.Print('hello world')
 >>> printed_x = hello_world_op(x)
@@ -43,16 +43,16 @@ then `a` will print before `b`.
 Printing graphs
 ---------------
 
-Theano provides two functions (:func:`theano.pp` and
-:func:`theano.printing.debugprint`) to print a graph to the terminal before or after
+Aesara provides two functions (:func:`aesara.pp` and
+:func:`aesara.printing.debugprint`) to print a graph to the terminal before or after
 compilation.  These two functions print expression graphs in different ways:
 :func:`pp` is more compact and math-like, :func:`debugprint` is more verbose.
-Theano also provides :func:`theano.printing.pydotprint` that creates a png image of the function.
+Aesara also provides :func:`aesara.printing.pydotprint` that creates a png image of the function.
 
-1) The first is :func:`theano.pp`.
+1) The first is :func:`aesara.pp`.
 
->>> from theano import pp, grad,
->>> from theano import tensor as tt
+>>> from aesara import pp, grad,
+>>> from aesara import tensor as tt
 >>> x = tt.dscalar('x')
 >>> y = x ** 2
 >>> gy = grad(y, x)
@@ -79,9 +79,9 @@ iteration number or other kinds of information in the name.
     automatic DimShuffles are not shown.
 
 
-2) The second function to print a graph is :func:`theano.printing.debugprint`
+2) The second function to print a graph is :func:`aesara.printing.debugprint`
 
->>> theano.printing.debugprint(f.maker.fgraph.outputs[0])  # doctest: +NORMALIZE_WHITESPACE
+>>> aesara.printing.debugprint(f.maker.fgraph.outputs[0])  # doctest: +NORMALIZE_WHITESPACE
 Elemwise{mul,no_inplace} [id A] ''
  |TensorConstant{2.0} [id B]
  |x [id C]
@@ -106,7 +106,7 @@ happen when that Variable has already been printed.  Where else has it been
 printed?  Look for debugprint identifier using the Find feature of your text
 editor.
 
->>> theano.printing.debugprint(gy)  # doctest: +NORMALIZE_WHITESPACE
+>>> aesara.printing.debugprint(gy)  # doctest: +NORMALIZE_WHITESPACE
 Elemwise{mul} [id A] ''
  |Elemwise{mul} [id B] ''
  | |Elemwise{second,no_inplace} [id C] ''
@@ -122,7 +122,7 @@ Elemwise{mul} [id A] ''
      |InplaceDimShuffle{} [id J] ''
        |TensorConstant{1} [id K]
 
->>> theano.printing.debugprint(gy, depth=2)  # doctest: +NORMALIZE_WHITESPACE
+>>> aesara.printing.debugprint(gy, depth=2)  # doctest: +NORMALIZE_WHITESPACE
 Elemwise{mul} [id A] ''
  |Elemwise{mul} [id B] ''
  |Elemwise{pow} [id C] ''
@@ -133,7 +133,7 @@ shown.
 
 
 
-3) The function :func:`theano.printing.pydotprint` will print a compiled theano function to a png file.
+3) The function :func:`aesara.printing.pydotprint` will print a compiled aesara function to a png file.
 
 
 In the image, Apply nodes (the applications of ops) are shown as ellipses and variables are shown as boxes.
@@ -180,16 +180,16 @@ Reference
         :param x: any symbolic variable
         :returns: symbolic identity(x)
 
-        When you use the return-value from this function in a theano function,
+        When you use the return-value from this function in an Aesara function,
         running the function will print the value that `x` takes in the graph.
 
 
-.. autofunction:: theano.printing.debugprint
+.. autofunction:: aesara.printing.debugprint
 
-.. function:: theano.pp(*args)
+.. function:: aesara.pp(*args)
 
-   Just a shortcut to :func:`theano.printing.pp`
+   Just a shortcut to :func:`aesara.printing.pp`
 
-.. autofunction:: theano.printing.pp(*args)
+.. autofunction:: aesara.printing.pp(*args)
 
-.. autofunction:: theano.printing.pydotprint
+.. autofunction:: aesara.printing.pydotprint
diff --git a/doc/library/sandbox/linalg.txt b/doc/library/sandbox/linalg.txt
index c4e1559a38..f4fa395ccb 100644
--- a/doc/library/sandbox/linalg.txt
+++ b/doc/library/sandbox/linalg.txt
@@ -1,5 +1,5 @@
-..  ../../../../theano/sandbox/linalg/ops.py
-..  ../../../../theano/sandbox/linalg
+..  ../../../../aesara/sandbox/linalg/ops.py
+..  ../../../../aesara/sandbox/linalg
 
 .. _libdoc_sandbox_linalg:
 
@@ -15,5 +15,5 @@
 API
 ===
 
-.. automodule:: theano.sandbox.linalg.ops
+.. automodule:: aesara.sandbox.linalg.ops
     :members:
diff --git a/doc/library/sandbox/rng_mrg.txt b/doc/library/sandbox/rng_mrg.txt
index 109d337d77..e4162cc3d9 100644
--- a/doc/library/sandbox/rng_mrg.txt
+++ b/doc/library/sandbox/rng_mrg.txt
@@ -12,5 +12,5 @@
 API
 ===
 
-.. automodule:: theano.sandbox.rng_mrg
+.. automodule:: aesara.sandbox.rng_mrg
     :members:
diff --git a/doc/library/scan.txt b/doc/library/scan.txt
index 2042b8145e..017daceaf1 100644
--- a/doc/library/scan.txt
+++ b/doc/library/scan.txt
@@ -2,7 +2,7 @@
 .. _lib_scan:
 
 ================================
-:mod:`scan` -- Looping in Theano
+:mod:`scan` -- Looping in Aesara
 ================================
 
 
@@ -10,7 +10,7 @@ Guide
 =====
 
 The scan functions provides the basic functionality needed to do loops
-in Theano. Scan comes with many whistles and bells, which we will introduce
+in Aesara. Scan comes with many whistles and bells, which we will introduce
 by way of examples.
 
 
@@ -33,18 +33,18 @@ the unchanging variable ``A``. Unchanging variables are passed to scan as
 ``non_sequences``. Initialization occurs in ``outputs_info``, and the accumulation
 happens automatically.
 
-The equivalent Theano code would be:
+The equivalent Aesara code would be:
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
 
   k = tt.iscalar("k")
   A = tt.vector("A")
 
   # Symbolic description of the result
-  result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A,
+  result, updates = aesara.scan(fn=lambda prior_result, A: prior_result * A,
                                 outputs_info=T.ones_like(A),
                                 non_sequences=A,
                                 n_steps=k)
@@ -55,7 +55,7 @@ The equivalent Theano code would be:
   final_result = result[-1]
 
   # compiled function that returns A**k
-  power = theano.function(inputs=[A,k], outputs=final_result, updates=updates)
+  power = aesara.function(inputs=[A,k], outputs=final_result, updates=updates)
 
   print(power(range(10),2))
   print(power(range(10),4))
@@ -102,21 +102,21 @@ from a list of its coefficients:
 
     import numpy
 
-    coefficients = theano.tensor.vector("coefficients")
+    coefficients = aesara.tensor.vector("coefficients")
     x = tt.scalar("x")
 
     max_coefficients_supported = 10000
 
     # Generate the components of the polynomial
-    components, updates = theano.scan(fn=lambda coefficient, power, free_variable: coefficient * (free_variable ** power),
+    components, updates = aesara.scan(fn=lambda coefficient, power, free_variable: coefficient * (free_variable ** power),
                                       outputs_info=None,
-                                      sequences=[coefficients, theano.tensor.arange(max_coefficients_supported)],
+                                      sequences=[coefficients, aesara.tensor.arange(max_coefficients_supported)],
                                       non_sequences=x)
     # Sum them up
     polynomial = components.sum()
 
     # Compile a function
-    calculate_polynomial = theano.function(inputs=[coefficients, x], outputs=polynomial)
+    calculate_polynomial = aesara.function(inputs=[coefficients, x], outputs=polynomial)
 
     # Test
     test_coefficients = numpy.asarray([1, 0, 2], dtype=numpy.float32)
@@ -143,7 +143,7 @@ The general order of function parameters to ``fn`` is::
     sequences (if any), prior result(s) (if needed), non-sequences (if any)
 
 Third, there's a handy trick used to simulate python's ``enumerate``: simply include
-``theano.tensor.arange`` to the sequences.
+``aesara.tensor.arange`` to the sequences.
 
 Fourth, given multiple sequences of uneven lengths, scan will truncate to the shortest of them.
 This makes it safe to pass a very long arange, which we need to do for generality, since
@@ -163,8 +163,8 @@ downcast** of the latter.
 
 
     import numpy as np
-    import theano
-    import theano.tensor as tt
+    import aesara
+    import aesara.tensor as tt
 
     up_to = tt.iscalar("up_to")
 
@@ -179,10 +179,10 @@ downcast** of the latter.
     # outputs_info = tt.as_tensor_variable(0)
 
     outputs_info = tt.as_tensor_variable(np.asarray(0, seq.dtype))
-    scan_result, scan_updates = theano.scan(fn=accumulate_by_adding,
+    scan_result, scan_updates = aesara.scan(fn=accumulate_by_adding,
                                             outputs_info=outputs_info,
                                             sequences=seq)
-    triangular_sequence = theano.function(inputs=[up_to], outputs=scan_result)
+    triangular_sequence = aesara.function(inputs=[up_to], outputs=scan_result)
 
     # test
     some_num = 15
@@ -215,12 +215,12 @@ with all values set to zero except at the provided array indices.
         zeros_subtensor = zeros[a_location[0], a_location[1]]
         return tt.set_subtensor(zeros_subtensor, a_value)
 
-    result, updates = theano.scan(fn=set_value_at_position,
+    result, updates = aesara.scan(fn=set_value_at_position,
                                   outputs_info=None,
                                   sequences=[location, values],
                                   non_sequences=output_model)
 
-    assign_values_at_positions = theano.function(inputs=[location, values, output_model], outputs=result)
+    assign_values_at_positions = aesara.function(inputs=[location, values, output_model], outputs=result)
 
     # test
     test_locations = numpy.asarray([[1, 1], [2, 3]], dtype=numpy.int32)
@@ -242,7 +242,7 @@ with all values set to zero except at the provided array indices.
       [  0.   0.   0.   0.   0.]
       [  0.   0.   0.   0.   0.]]]
 
-This demonstrates that you can introduce new Theano variables into a scan function.
+This demonstrates that you can introduce new Aesara variables into a scan function.
 
 
 .. _lib_scan_shared_variables:
@@ -256,7 +256,7 @@ the following:
 
 .. testsetup:: scan1
 
-   import theano
+   import aesara
    import numpy
    W_values = numpy.random.random((2, 2))
    bvis_values = numpy.random.random((2,))
@@ -264,29 +264,29 @@ the following:
 
 .. testcode:: scan1
 
-   import theano
-   from theano import tensor as tt
+   import aesara
+   from aesara import tensor as tt
 
-   W = theano.shared(W_values) # we assume that ``W_values`` contains the
+   W = aesara.shared(W_values) # we assume that ``W_values`` contains the
                                # initial values of your weight matrix
 
-   bvis = theano.shared(bvis_values)
-   bhid = theano.shared(bhid_values)
+   bvis = aesara.shared(bvis_values)
+   bhid = aesara.shared(bhid_values)
 
-   trng = theano.tensor.random.utils.RandomStream(1234)
+   trng = aesara.tensor.random.utils.RandomStream(1234)
 
    def OneStep(vsample) :
-       hmean = tt.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+       hmean = tt.nnet.sigmoid(aesara.dot(vsample, W) + bhid)
        hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-       vmean = tt.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+       vmean = tt.nnet.sigmoid(aesara.dot(hsample, W.T) + bvis)
        return trng.binomial(size=vsample.shape, n=1, p=vmean,
-                            dtype=theano.config.floatX)
+                            dtype=aesara.config.floatX)
 
-   sample = theano.tensor.vector()
+   sample = aesara.tensor.vector()
 
-   values, updates = theano.scan(OneStep, outputs_info=sample, n_steps=10)
+   values, updates = aesara.scan(OneStep, outputs_info=sample, n_steps=10)
 
-   gibbs10 = theano.function([sample], values[-1], updates=updates)
+   gibbs10 = aesara.function([sample], values[-1], updates=updates)
 
 
 The first, and probably most crucial observation is that the updates
@@ -299,12 +299,12 @@ afterwards. Look at this example :
 
 .. testsetup:: scan2
 
-   import theano
+   import aesara
 
 .. testcode:: scan2
 
-    a = theano.shared(1)
-    values, updates = theano.scan(lambda: {a: a+1}, n_steps=10)
+    a = aesara.shared(1)
+    values, updates = aesara.scan(lambda: {a: a+1}, n_steps=10)
 
 In this case the lambda expression does not require any input parameters
 and returns an update dictionary which tells how ``a`` should be updated
@@ -314,7 +314,7 @@ after each step of scan. If we write :
 
     b = a + 1
     c = updates[a] + 1
-    f = theano.function([], [b, c], updates=updates)
+    f = aesara.function([], [b, c], updates=updates)
 
     print(b)
     print(c)
@@ -348,31 +348,31 @@ updated:
 
 .. testcode:: scan1
 
-    W = theano.shared(W_values) # we assume that ``W_values`` contains the
+    W = aesara.shared(W_values) # we assume that ``W_values`` contains the
                                 # initial values of your weight matrix
 
-    bvis = theano.shared(bvis_values)
-    bhid = theano.shared(bhid_values)
+    bvis = aesara.shared(bvis_values)
+    bhid = aesara.shared(bhid_values)
 
-    trng = theano.tensor.random.utils.RandomStream(1234)
+    trng = aesara.tensor.random.utils.RandomStream(1234)
 
     # OneStep, with explicit use of the shared variables (W, bvis, bhid)
     def OneStep(vsample, W, bvis, bhid):
-        hmean = tt.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+        hmean = tt.nnet.sigmoid(aesara.dot(vsample, W) + bhid)
         hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = tt.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+        vmean = tt.nnet.sigmoid(aesara.dot(hsample, W.T) + bvis)
         return trng.binomial(size=vsample.shape, n=1, p=vmean,
-                         dtype=theano.config.floatX)
+                         dtype=aesara.config.floatX)
 
-    sample = theano.tensor.vector()
+    sample = aesara.tensor.vector()
 
     # The new scan, with the shared variables passed as non_sequences
-    values, updates = theano.scan(fn=OneStep,
+    values, updates = aesara.scan(fn=OneStep,
                                   outputs_info=sample,
                                   non_sequences=[W, bvis, bhid],
                                   n_steps=10)
 
-    gibbs10 = theano.function([sample], values[-1], updates=updates)
+    gibbs10 = aesara.function([sample], values[-1], updates=updates)
 
 
 .. _lib_scan_strict:
@@ -394,14 +394,14 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the
 
     # Same OneStep as in original example.
     def OneStep(vsample) :
-        hmean = tt.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+        hmean = tt.nnet.sigmoid(aesara.dot(vsample, W) + bhid)
         hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = tt.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+        vmean = tt.nnet.sigmoid(aesara.dot(hsample, W.T) + bvis)
         return trng.binomial(size=vsample.shape, n=1, p=vmean,
-                             dtype=theano.config.floatX)
+                             dtype=aesara.config.floatX)
 
     # The new scan, adding strict=True to the original call.
-    values, updates = theano.scan(OneStep,
+    values, updates = aesara.scan(OneStep,
                                   outputs_info=sample,
                                   n_steps=10,
                                   strict=True)
@@ -412,7 +412,7 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the
     ...
     MissingInputError: An input of the graph, used to compute
     DimShuffle{1,0}(<TensorType(float64, matrix)>), was not provided and
-    not given a value.Use the Theano flag exception_verbosity='high',for
+    not given a value.Use the Aesara flag exception_verbosity='high',for
     more information on this error.
 
 The error indicates that ``OneStep`` relies on variables that are not passed
@@ -423,15 +423,15 @@ variables passed explicitly to ``OneStep`` and to scan:
 
     # OneStep, with explicit use of the shared variables (W, bvis, bhid)
     def OneStep(vsample, W, bvis, bhid) :
-        hmean = tt.nnet.sigmoid(theano.dot(vsample, W) + bhid)
+        hmean = tt.nnet.sigmoid(aesara.dot(vsample, W) + bhid)
         hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = tt.nnet.sigmoid(theano.dot(hsample, W.T) + bvis)
+        vmean = tt.nnet.sigmoid(aesara.dot(hsample, W.T) + bvis)
         return trng.binomial(size=vsample.shape, n=1, p=vmean,
-                             dtype=theano.config.floatX)
+                             dtype=aesara.config.floatX)
 
     # The new scan, adding strict=True to the original call, and passing
     # explicitly W, bvis and bhid.
-    values, updates = theano.scan(OneStep,
+    values, updates = aesara.scan(OneStep,
                                   outputs_info=sample,
                                   non_sequences=[W, bvis, bhid],
                                   n_steps=10,
@@ -464,18 +464,18 @@ construct a function that computes one iteration step :
 
 .. testsetup:: scan3
 
-   import theano
-   from theano import tensor as tt
+   import aesara
+   from aesara import tensor as tt
 
 .. testcode:: scan3
 
   def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2,  W_feedback, W_out):
 
-    x_t = tt.tanh(theano.dot(x_tm1, W) + \
-                 theano.dot(u_t,   W_in_1) + \
-                 theano.dot(u_tm4, W_in_2) + \
-                 theano.dot(y_tm1, W_feedback))
-    y_t = theano.dot(x_tm3, W_out)
+    x_t = tt.tanh(aesara.dot(x_tm1, W) + \
+                 aesara.dot(u_t,   W_in_1) + \
+                 aesara.dot(u_tm4, W_in_2) + \
+                 aesara.dot(y_tm1, W_feedback))
+    y_t = aesara.dot(x_tm3, W_out)
 
     return [x_t, y_t]
 
@@ -488,7 +488,7 @@ for the variables representing the different time taps to be in the same order
 as the one in which these taps are given. Also, not only taps should respect
 an order, but also variables, since this is how scan figures out what should
 be represented by what. Given that we have all
-the Theano variables needed we construct our RNN as follows :
+the Aesara variables needed we construct our RNN as follows :
 
 .. testcode:: scan3
 
@@ -505,7 +505,7 @@ the Theano variables needed we construct our RNN as follows :
                    # y[-1]
 
 
-   ([x_vals, y_vals], updates) = theano.scan(fn=oneStep,
+   ([x_vals, y_vals], updates) = aesara.scan(fn=oneStep,
                                              sequences=dict(input=u, taps=[-4,-0]),
                                              outputs_info=[dict(initial=x0, taps=[-3,-1]), y0],
                                              non_sequences=[W, W_in_1, W_in_2, W_feedback, W_out],
@@ -539,15 +539,15 @@ value ``max_value``.
 .. testcode::
 
     def power_of_2(previous_power, max_value):
-        return previous_power*2, theano.scan.utils.until(previous_power*2 > max_value)
+        return previous_power*2, aesara.scan.utils.until(previous_power*2 > max_value)
 
     max_value = tt.scalar()
-    values, _ = theano.scan(power_of_2,
+    values, _ = aesara.scan(power_of_2,
                             outputs_info = tt.constant(1.),
                             non_sequences = max_value,
                             n_steps = 1024)
 
-    f = theano.function([max_value], values)
+    f = aesara.function([max_value], values)
 
     print(f(45))
 
@@ -557,7 +557,7 @@ value ``max_value``.
 
 As you can see, in order to terminate on condition, the only thing required
 is that the inner function ``power_of_2`` to return also the condition
-wrapped in the class ``theano.scan.utils.until``. The condition has to be
+wrapped in the class ``aesara.scan.utils.until``. The condition has to be
 expressed in terms of the arguments of the inner function (in this case
 ``previous_power`` and ``max_value``).
 
@@ -587,7 +587,7 @@ Before going more into the details, here are its current limitations:
   other words, ``taps`` can not be used in ``sequences`` and ``outputs_info``.
 
 Often, in order to be able to compute the gradients through scan operations,
-Theano needs to keep in memory some intermediate computations of scan. This
+Aesara needs to keep in memory some intermediate computations of scan. This
 can sometimes use a prohibitively large amount of memory.
 ``scan_checkpoints`` allows to discard some of those intermediate steps and
 recompute them again when computing the gradients. Its ``save_every_N`` argument
@@ -602,7 +602,7 @@ is similar to the classic ``scan`` function.
 Optimizing Scan's performance
 -----------------------------
 
-This section covers some ways to improve performance of a Theano function
+This section covers some ways to improve performance of an Aesara function
 using Scan.
 
 
@@ -638,9 +638,9 @@ improve performance at the cost of increased memory usage. By default, Scan
 reuses memory between iterations of the same execution but frees the memory
 after the last iteration.
 
-There are two ways to achieve this, using the Theano flag
+There are two ways to achieve this, using the Aesara flag
 ``config.scan__allow_gc`` and setting it to False, or using the argument
-``allow_gc`` of the function theano.scan() and set it to False (when a value
+``allow_gc`` of the function aesara.scan() and set it to False (when a value
 is not provided for this argument, the value of the flag
 ``config.scan__allow_gc`` is used).
 
@@ -648,19 +648,19 @@ is not provided for this argument, the value of the flag
 Graph optimizations
 ^^^^^^^^^^^^^^^^^^^
 
-This one is simple but still worth pointing out. Theano is able to
+This one is simple but still worth pointing out. Aesara is able to
 automatically recognize and optimize many computation patterns. However, there
-are patterns that Theano doesn't optimize because doing so would change the
+are patterns that Aesara doesn't optimize because doing so would change the
 user interface (such as merging shared variables together into a single one,
-for instance). Additionally, Theano doesn't catch every case that it could
+for instance). Additionally, Aesara doesn't catch every case that it could
 optimize and so it remains useful for performance that the user defines an
 efficient graph in the first place. This is also the case, and sometimes even
 more so, for the graph inside of Scan. This is because it will be executed
-many times for every execution of the Theano function that contains it.
+many times for every execution of the Aesara function that contains it.
 
 The `LSTM tutorial <http://deeplearning.net/tutorial/lstm.html>`_ on
 `DeepLearning.net <http://deeplearning.net>`_ provides an example of an
-optimization that Theano cannot perform. Instead of performing many matrix
+optimization that Aesara cannot perform. Instead of performing many matrix
 multiplications between matrix :math:`x_t` and each of the shared matrices
 :math:`W_i`, :math:`W_c`, :math:`W_f` and :math:`W_o`, the matrices
 :math:`W_*`, are merged into a single shared matrix :math:`W` and the graph
@@ -675,11 +675,11 @@ higher memory usage.
 reference
 =========
 
-.. automodule:: theano.scan
+.. automodule:: aesara.scan
 
-.. autofunction:: theano.map
-.. autofunction:: theano.reduce
-.. autofunction:: theano.foldl
-.. autofunction:: theano.foldr
-.. autofunction:: theano.scan
-.. autofunction:: theano.scan_checkpoints
+.. autofunction:: aesara.map
+.. autofunction:: aesara.reduce
+.. autofunction:: aesara.foldl
+.. autofunction:: aesara.foldr
+.. autofunction:: aesara.scan
+.. autofunction:: aesara.scan_checkpoints
diff --git a/doc/library/sparse/index.txt b/doc/library/sparse/index.txt
index 8b88f1047d..4dd96c3840 100644
--- a/doc/library/sparse/index.txt
+++ b/doc/library/sparse/index.txt
@@ -8,13 +8,13 @@
 In the tutorial section, you can find a :ref:`sparse tutorial
 <tutsparse>`.
 
-The sparse submodule is not loaded when we import Theano. You must
-import ``theano.sparse`` to enable it.
+The sparse submodule is not loaded when we import Aesara. You must
+import ``aesara.sparse`` to enable it.
 
 The sparse module provides the same functionality as the tensor
 module. The difference lies under the covers because sparse matrices
 do not store data in a contiguous array. Note that there are no GPU
-implementations for sparse matrices in Theano. The sparse module has
+implementations for sparse matrices in Aesara. The sparse module has
 been used in:
 
 - NLP: Dense linear transformations of sparse vectors.
@@ -24,14 +24,11 @@ Compressed Sparse Format
 ========================
 
 This section tries to explain how information is stored for the two
-sparse formats of SciPy supported by Theano. There are more formats
-that can be used with SciPy and some documentation about them may be
-found `here
-<http://deeplearning.net/software/theano/sandbox/sparse.html>`_.
+sparse formats of SciPy supported by Aesara.
 
 .. Changes to this section should also result in changes to tutorial/sparse.txt.
 
-Theano supports two *compressed sparse formats*: ``csc`` and ``csr``,
+Aesara supports two *compressed sparse formats*: ``csc`` and ``csr``,
 respectively based on columns and rows. They have both the same
 attributes: ``data``, ``indices``, ``indptr`` and ``shape``.
 
@@ -123,57 +120,57 @@ List of Implemented Operations
 ==============================
 
 - Moving from and to sparse
-    - :func:`dense_from_sparse <theano.sparse.basic.dense_from_sparse>`.
+    - :func:`dense_from_sparse <aesara.sparse.basic.dense_from_sparse>`.
       Both grads are implemented. Structured by default.
-    - :func:`csr_from_dense <theano.sparse.basic.csr_from_dense>`,
-      :func:`csc_from_dense <theano.sparse.basic.csc_from_dense>`.
+    - :func:`csr_from_dense <aesara.sparse.basic.csr_from_dense>`,
+      :func:`csc_from_dense <aesara.sparse.basic.csc_from_dense>`.
       The grad implemented is structured.
-    - Theano SparseVariable objects have a method ``toarray()`` that is the same as
-      :func:`dense_from_sparse <theano.sparse.basic.dense_from_sparse>`.
+    - Aesara SparseVariable objects have a method ``toarray()`` that is the same as
+      :func:`dense_from_sparse <aesara.sparse.basic.dense_from_sparse>`.
 
 - Construction of Sparses and their Properties
-    - :class:`CSM <theano.sparse.basic.CSM>` and ``CSC``, ``CSR`` to construct a matrix.
+    - :class:`CSM <aesara.sparse.basic.CSM>` and ``CSC``, ``CSR`` to construct a matrix.
       The grad implemented is regular.
-    - :func:`csm_properties <theano.sparse.basic.csm_properties>`.
+    - :func:`csm_properties <aesara.sparse.basic.csm_properties>`.
       to get the properties of a sparse matrix.
       The grad implemented is regular.
     - csm_indices(x), csm_indptr(x), csm_data(x) and csm_shape(x) or x.shape.
-    - :func:`sp_ones_like <theano.sparse.basic.sp_ones_like>`.
+    - :func:`sp_ones_like <aesara.sparse.basic.sp_ones_like>`.
       The grad implemented is regular.
-    - :func:`sp_zeros_like <theano.sparse.basic.sp_zeros_like>`.
+    - :func:`sp_zeros_like <aesara.sparse.basic.sp_zeros_like>`.
       The grad implemented is regular.
-    - :func:`square_diagonal <theano.sparse.basic.square_diagonal>`.
+    - :func:`square_diagonal <aesara.sparse.basic.square_diagonal>`.
       The grad implemented is regular.
-    - :func:`construct_sparse_from_list <theano.sparse.basic.construct_sparse_from_list>`.
+    - :func:`construct_sparse_from_list <aesara.sparse.basic.construct_sparse_from_list>`.
       The grad implemented is regular.
 
 - Cast
-    - :func:`cast <theano.sparse.basic.cast>` with ``bcast``, ``wcast``, ``icast``, ``lcast``,
+    - :func:`cast <aesara.sparse.basic.cast>` with ``bcast``, ``wcast``, ``icast``, ``lcast``,
       ``fcast``, ``dcast``, ``ccast``, and ``zcast``.
       The grad implemented is regular.
 
 - Transpose
-    - :func:`transpose <theano.sparse.basic.transpose>`.
+    - :func:`transpose <aesara.sparse.basic.transpose>`.
       The grad implemented is regular.
 
 - Basic Arithmetic
-    - :func:`neg <theano.sparse.basic.neg>`.
+    - :func:`neg <aesara.sparse.basic.neg>`.
       The grad implemented is regular.
-    - :func:`eq <theano.sparse.basic.eq>`.
-    - :func:`neq <theano.sparse.basic.neq>`.
-    - :func:`gt <theano.sparse.basic.gt>`.
-    - :func:`ge <theano.sparse.basic.ge>`.
-    - :func:`lt <theano.sparse.basic.lt>`.
-    - :func:`le <theano.sparse.basic.le>`.
-    - :func:`add <theano.sparse.basic.add>`.
+    - :func:`eq <aesara.sparse.basic.eq>`.
+    - :func:`neq <aesara.sparse.basic.neq>`.
+    - :func:`gt <aesara.sparse.basic.gt>`.
+    - :func:`ge <aesara.sparse.basic.ge>`.
+    - :func:`lt <aesara.sparse.basic.lt>`.
+    - :func:`le <aesara.sparse.basic.le>`.
+    - :func:`add <aesara.sparse.basic.add>`.
       The grad implemented is regular.
-    - :func:`sub <theano.sparse.basic.sub>`.
+    - :func:`sub <aesara.sparse.basic.sub>`.
       The grad implemented is regular.
-    - :func:`mul <theano.sparse.basic.mul>`.
+    - :func:`mul <aesara.sparse.basic.mul>`.
       The grad implemented is regular.
-    - :func:`col_scale <theano.sparse.basic.col_scale>` to multiply by a vector along the columns.
+    - :func:`col_scale <aesara.sparse.basic.col_scale>` to multiply by a vector along the columns.
       The grad implemented is structured.
-    - :func:`row_scale <theano.sparse.basic.row_scale>` to multiply by a vector along the rows.
+    - :func:`row_scale <aesara.sparse.basic.row_scale>` to multiply by a vector along the rows.
       The grad implemented is structured.
 
 - Monoid (Element-wise operation with only one sparse input).
@@ -207,13 +204,13 @@ List of Implemented Operations
     - ``sqrt``
 
 - Dot Product
-    - :func:`dot <theano.sparse.basic.dot>`.
+    - :func:`dot <aesara.sparse.basic.dot>`.
 
         - One of the inputs must be sparse, the other sparse or dense.
         - The grad implemented is regular.
         - No C code for perform and no C code for grad.
         - Returns a dense for perform and a dense for grad.
-    - :func:`structured_dot <theano.sparse.basic.structured_dot>`.
+    - :func:`structured_dot <aesara.sparse.basic.structured_dot>`.
 
         - The first input is sparse, the second can be sparse or dense.
         - The grad implemented is structured.
@@ -222,7 +219,7 @@ List of Implemented Operations
           dense one if one of the inputs is dense.
         - Returns a sparse grad for sparse inputs and dense grad for
           dense inputs.
-    - :func:`true_dot <theano.sparse.basic.true_dot>`.
+    - :func:`true_dot <aesara.sparse.basic.true_dot>`.
 
         - The first input is sparse, the second can be sparse or dense.
         - The grad implemented is regular.
@@ -232,18 +229,18 @@ List of Implemented Operations
           default a dense for dense inputs. The parameter
           ``grad_preserves_dense`` can be set to False to return a
           sparse grad for dense inputs.
-    - :func:`sampling_dot <theano.sparse.basic.sampling_dot>`.
+    - :func:`sampling_dot <aesara.sparse.basic.sampling_dot>`.
 
         - Both inputs must be dense.
         - The grad implemented is structured for `p`.
         - Sample of the dot and sample of the gradient.
         - C code for perform but not for grad.
         - Returns sparse for perform and grad.
-    - :func:`usmm <theano.sparse.basic.usmm>`.
+    - :func:`usmm <aesara.sparse.basic.usmm>`.
 
         - You *shouldn't* insert this op yourself!
            - There is an optimization that transform a
-             :func:`dot <theano.sparse.basic.dot>` to ``Usmm`` when possible.
+             :func:`dot <aesara.sparse.basic.dot>` to ``Usmm`` when possible.
 
         - This op is the equivalent of gemm for sparse dot.
         - There is no grad implemented for this op.
@@ -258,29 +255,29 @@ List of Implemented Operations
     - Sparse variables don't support [M, N:O] and [M:N, O] as we don't
       support sparse vectors and returning a sparse matrix would break
       the numpy interface.  Use [M:M+1, N:O] and [M:N, O:O+1] instead.
-    - :func:`diag <theano.sparse.basic.diag>`.
+    - :func:`diag <aesara.sparse.basic.diag>`.
       The grad implemented is regular.
 
 - Concatenation
-    - :func:`hstack <theano.sparse.basic.hstack>`.
+    - :func:`hstack <aesara.sparse.basic.hstack>`.
       The grad implemented is regular.
-    - :func:`vstack <theano.sparse.basic.vstack>`.
+    - :func:`vstack <aesara.sparse.basic.vstack>`.
       The grad implemented is regular.
 
 - Probability
     `There is no grad implemented for these operations.`
 
-    - :class:`Poisson <theano.sparse.basic.Poisson>` and ``poisson``
-    - :class:`Binomial <theano.sparse.basic.Binomial>` and ``csc_fbinomial``, ``csc_dbinomial``
+    - :class:`Poisson <aesara.sparse.basic.Poisson>` and ``poisson``
+    - :class:`Binomial <aesara.sparse.basic.Binomial>` and ``csc_fbinomial``, ``csc_dbinomial``
       ``csr_fbinomial``, ``csr_dbinomial``
-    - :class:`Multinomial <theano.sparse.basic.Multinomial>` and ``multinomial``
+    - :class:`Multinomial <aesara.sparse.basic.Multinomial>` and ``multinomial``
 
 - Internal Representation
     `They all have a regular grad implemented.`
 
-    - :func:`ensure_sorted_indices <theano.sparse.basic.ensure_sorted_indices>`.
-    - :func:`remove0 <theano.sparse.basic.remove0>`.
-    - :func:`clean <theano.sparse.basic.clean>` to resort indices and remove zeros
+    - :func:`ensure_sorted_indices <aesara.sparse.basic.ensure_sorted_indices>`.
+    - :func:`remove0 <aesara.sparse.basic.remove0>`.
+    - :func:`clean <aesara.sparse.basic.clean>` to resort indices and remove zeros
 
 - To help testing
     - :func:`tests.sparse.test_basic.sparse_random_inputs`
@@ -294,7 +291,7 @@ List of Implemented Operations
    :synopsis: Sparse Op
 .. moduleauthor:: LISA
 
-.. automodule:: theano.sparse.basic
+.. automodule:: aesara.sparse.basic
     :members:
 
-.. autofunction:: theano.sparse.sparse_grad
+.. autofunction:: aesara.sparse.sparse_grad
diff --git a/doc/library/sparse/sandbox.txt b/doc/library/sparse/sandbox.txt
index 1ba937f05d..5de4bf1064 100644
--- a/doc/library/sparse/sandbox.txt
+++ b/doc/library/sparse/sandbox.txt
@@ -1,5 +1,5 @@
-..  ../../../../theano/sparse/sandbox/sp.py
-..  ../../../../theano/sparse/sandbox/truedot.py
+..  ../../../../aesara/sparse/sandbox/sp.py
+..  ../../../../aesara/sparse/sandbox/truedot.py
 
 .. _libdoc_sparse_sandbox:
 
@@ -15,9 +15,9 @@
 API
 ===
 
-.. automodule:: theano.sparse.sandbox.sp
+.. automodule:: aesara.sparse.sandbox.sp
     :members:
-.. automodule:: theano.sparse.sandbox.sp2
+.. automodule:: aesara.sparse.sandbox.sp2
     :members:
-.. automodule:: theano.sparse.sandbox.truedot
+.. automodule:: aesara.sparse.sandbox.truedot
     :members:
diff --git a/doc/library/tensor/basic.txt b/doc/library/tensor/basic.txt
index 1bb9fb2919..9411c33a1c 100644
--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -1,4 +1,4 @@
-.. currentmodule:: theano.tensor
+.. currentmodule:: aesara.tensor
 
 .. _libdoc_basic_tensor:
 
@@ -8,27 +8,27 @@ Basic Tensor Functionality
 
 .. testsetup::
 
-   import theano
-   import theano.tensor as tt
-   from theano.tensor.type import scalar, iscalar, TensorType, dmatrix, ivector, fmatrix
-   from theano.tensor import set_subtensor, inc_subtensor, batched_dot
-   from theano import shared
+   import aesara
+   import aesara.tensor as tt
+   from aesara.tensor.type import scalar, iscalar, TensorType, dmatrix, ivector, fmatrix
+   from aesara.tensor import set_subtensor, inc_subtensor, batched_dot
+   from aesara import shared
    import numpy
    import numpy as np
 
-Theano supports any kind of Python object, but its focus is support for
+Aesara supports any kind of Python object, but its focus is support for
 symbolic matrix expressions.  When you type,
 
 >>> x = tt.fmatrix()
 
 the ``x`` is a :class:`TensorVariable` instance.
 The ``tt.fmatrix`` object itself is an instance of :class:`TensorType`.
-Theano knows what type of variable ``x`` is because ``x.type``
+Aesara knows what type of variable ``x`` is because ``x.type``
 points back to ``tt.fmatrix``.
 
 This chapter explains the various ways of creating tensor variables,
 the attributes and methods of :class:`TensorVariable` and :class:`TensorType`,
-and various basic symbolic math and arithmetic that Theano supports for
+and various basic symbolic math and arithmetic that Aesara supports for
 tensor variables.
 
 .. _libdoc_tensor_creation:
@@ -36,7 +36,7 @@ tensor variables.
 Creation
 ========
 
-Theano provides a list of predefined tensor types that can be used
+Aesara provides a list of predefined tensor types that can be used
 to create a tensor variables.  Variables can be named to facilitate debugging,
 and all of these constructors accept an optional ``name`` argument.
 For example, the following each produce a TensorVariable instance that stands
@@ -109,12 +109,12 @@ floating-point precision.
 All Fully-Typed Constructors
 ----------------------------
 
-The following TensorType instances are provided in the theano.tensor module.
+The following TensorType instances are provided in the aesara.tensor module.
 They are all callable, and accept an optional ``name`` argument.  So for example:
 
 .. testcode:: constructors
 
-   from theano.tensor import *
+   from aesara.tensor import *
 
    x = dmatrix()        # creates one Variable with no name
    x = dmatrix('x')     # creates one Variable with name 'x'
@@ -122,7 +122,7 @@ They are all callable, and accept an optional ``name`` argument.  So for example
 
 .. #COMMENT
     table generated by
-    $ python Theano/doc/generate_dtype_tensor_table.py
+    $ python Aesara/doc/generate_dtype_tensor_table.py
 
 ============ =========== ==== ================ ===================================
 Constructor  dtype       ndim shape            broadcastable
@@ -243,7 +243,7 @@ name. For example:
 
 .. testcode:: constructors
 
-   from theano.tensor import *
+   from aesara.tensor import *
 
    x, y, z = dmatrices(3) # creates three matrix Variables with no names
    x, y, z = dmatrices('x', 'y', 'z') # creates three matrix Variables named 'x', 'y' and 'z'
@@ -296,14 +296,14 @@ For additional information, see the :func:`shared() <shared.shared>` documentati
 Finally, when you use a numpy ndarray or a Python number together with
 :class:`TensorVariable` instances in arithmetic expressions, the result is a
 :class:`TensorVariable`. What happens to the ndarray or the number?
-Theano requires that the inputs to all expressions be Variable instances, so
-Theano automatically wraps them in a :class:`TensorConstant`.
+Aesara requires that the inputs to all expressions be Variable instances, so
+Aesara automatically wraps them in a :class:`TensorConstant`.
 
 .. note::
 
-    Theano makes a copy of any ndarray that you use in an expression, so
+    Aesara makes a copy of any ndarray that you use in an expression, so
     subsequent
-    changes to that ndarray will not have any effect on the Theano expression.
+    changes to that ndarray will not have any effect on the Aesara expression.
 
 For numpy ndarrays the dtype is given, but the broadcastable pattern must be
 inferred.  The TensorConstant is given a type with a matching dtype,
@@ -321,7 +321,7 @@ them perfectly, but a dscalar otherwise.
     are stored instead as single-precision floats.
 
     For fine control of this rounding policy, see
-    theano.tensor.basic.autocast_float.
+    aesara.tensor.basic.autocast_float.
 
 .. function:: as_tensor_variable(x, name=None, ndim=None)
 
@@ -536,7 +536,7 @@ TensorVariable
 
         .. note::
 
-            In numpy and in Theano, the transpose of a vector is exactly the
+            In numpy and in Aesara, the transpose of a vector is exactly the
             same vector!  Use `reshape` or `dimshuffle` to turn your vector
             into a row or column matrix.
 
@@ -560,7 +560,7 @@ TensorVariable
     .. method:: get_scalar_constant_value()
     .. method:: zeros_like(model, dtype=None)
 
-       All the above methods are equivalent to NumPy for Theano on the current tensor.
+       All the above methods are equivalent to NumPy for Aesara on the current tensor.
 
     .. method:: __{abs,neg,lt,le,gt,ge,invert,and,or,add,sub,mul,div,truediv,floordiv}__
 
@@ -637,14 +637,14 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
 
     Example:
 
-    >>> tensor = theano.tensor.type.tensor3()
-    >>> theano.tensor.shape_padaxis(tensor, axis=0)
+    >>> tensor = aesara.tensor.type.tensor3()
+    >>> aesara.tensor.shape_padaxis(tensor, axis=0)
     InplaceDimShuffle{x,0,1,2}.0
-    >>> theano.tensor.shape_padaxis(tensor, axis=1)
+    >>> aesara.tensor.shape_padaxis(tensor, axis=1)
     InplaceDimShuffle{0,x,1,2}.0
-    >>> theano.tensor.shape_padaxis(tensor, axis=3)
+    >>> aesara.tensor.shape_padaxis(tensor, axis=3)
     InplaceDimShuffle{0,1,2,x}.0
-    >>> theano.tensor.shape_padaxis(tensor, axis=-1)
+    >>> aesara.tensor.shape_padaxis(tensor, axis=-1)
     InplaceDimShuffle{0,1,2,x}.0
 
 .. autofunction:: unbroadcast(x, *axes)
@@ -687,8 +687,8 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
         <http://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html>`_
         documentation for examples.
 
-    :see: :func:`theano.tensor.extra_ops.repeat
-        <theano.tensor.extra_ops.repeat>`
+    :see: :func:`aesara.tensor.extra_ops.repeat
+        <aesara.tensor.extra_ops.repeat>`
 
     :note: Currently, `reps` must be a constant, `x.ndim` and
         `len(reps)` must be equal and, if specified, `ndim` must be
@@ -736,7 +736,7 @@ Creating Tensor
 .. function:: fill(a,b)
 
     :param a: tensor that has same shape as output
-    :param b: theano scalar or value with which you want to fill the output
+    :param b: aesara scalar or value with which you want to fill the output
 
     Create a matrix by filling the shape of `a` with `b`
 
@@ -746,13 +746,13 @@ Creating Tensor
     :param shape: the dimensions of the returned array
     :returns: an N-dimensional tensor initialized by `value` and having the specified shape.
 
-.. function:: eye(n, m=None, k=0, dtype=theano.config.floatX)
+.. function:: eye(n, m=None, k=0, dtype=aesara.config.floatX)
 
-    :param n: number of rows in output (value or theano scalar)
-    :param m: number of columns in output (value or theano scalar)
+    :param n: number of rows in output (value or aesara scalar)
+    :param m: number of columns in output (value or aesara scalar)
     :param k: Index of the diagonal: 0 refers to the main diagonal,
               a positive value refers to an upper diagonal, and a
-              negative value to a lower diagonal. It can be a theano
+              negative value to a lower diagonal. It can be an Aesara
               scalar.
     :returns: An array where all elements are equal to zero, except for the `k`-th
               diagonal, whose values are equal to one.
@@ -778,16 +778,16 @@ Creating Tensor
 
     Examples:
 
-    >>> a = theano.tensor.type.scalar()
-    >>> b = theano.tensor.type.scalar()
-    >>> c = theano.tensor.type.scalar()
-    >>> x = theano.tensor.stack([a, b, c])
+    >>> a = aesara.tensor.type.scalar()
+    >>> b = aesara.tensor.type.scalar()
+    >>> c = aesara.tensor.type.scalar()
+    >>> x = aesara.tensor.stack([a, b, c])
     >>> x.ndim # x is a vector of length 3.
     1
-    >>> a = theano.tensor.type.tensor4()
-    >>> b = theano.tensor.type.tensor4()
-    >>> c = theano.tensor.type.tensor4()
-    >>> x = theano.tensor.stack([a, b, c])
+    >>> a = aesara.tensor.type.tensor4()
+    >>> b = aesara.tensor.type.tensor4()
+    >>> c = aesara.tensor.type.tensor4()
+    >>> x = aesara.tensor.stack([a, b, c])
     >>> x.ndim # x is a 5d tensor.
     5
     >>> rval = x.eval(dict((t, np.zeros((2, 2, 2, 2))) for t in [a, b, c]))
@@ -796,13 +796,13 @@ Creating Tensor
 
     We can also specify different axis than default value 0
 
-    >>> x = theano.tensor.stack([a, b, c], axis=3)
+    >>> x = aesara.tensor.stack([a, b, c], axis=3)
     >>> x.ndim
     5
     >>> rval = x.eval(dict((t, np.zeros((2, 2, 2, 2))) for t in [a, b, c]))
     >>> rval.shape # 3 tensors are stacked on axis 3
     (2, 2, 2, 3, 2)
-    >>> x = theano.tensor.stack([a, b, c], axis=-2)
+    >>> x = aesara.tensor.stack([a, b, c], axis=-2)
     >>> x.ndim
     5
     >>> rval = x.eval(dict((t, np.zeros((2, 2, 2, 2))) for t in [a, b, c]))
@@ -858,8 +858,8 @@ Creating Tensor
 
     This function can create a tensor from a shaped list of scalars:
 
-    >>> from theano.tensor import stacklists, scalars, matrices
-    >>> from theano import function
+    >>> from aesara.tensor import stacklists, scalars, matrices
+    >>> from aesara import function
     >>> a, b, c, d = scalars('abcd')
     >>> X = stacklists([[a, b], [c, d]])
     >>> f = function([a, b, c, d], X)
@@ -878,7 +878,7 @@ Creating Tensor
     >>> f(x, x, x, x).shape
     (2, 2, 4, 4)
 
-.. autofunction:: theano.tensor.basic.choose
+.. autofunction:: aesara.tensor.basic.choose
 
 Reductions
 ==========
@@ -907,8 +907,7 @@ Reductions
 		will broadcast correctly against the original tensor.
     :Returns: the index of the maximum value along a given axis
 
-    if axis=None, Theano 0.5rc1 or later: argmax over the flattened tensor (like numpy)
-                  older: then axis is assumed to be ndim(x)-1
+    if axis=None, argmax over the flattened tensor (like numpy)
 
 .. function:: max_and_argmax(x, axis=None, keepdims=False)
 
@@ -919,8 +918,7 @@ Reductions
 		will broadcast correctly against the original tensor.
     :Returns: the maximum value along a given axis and its index.
 
-    if axis=None, Theano 0.5rc1 or later: max_and_argmax over the flattened tensor (like numpy)
-                  older: then axis is assumed to be ndim(x)-1
+    if axis=None, max_and_argmax over the flattened tensor (like numpy)
 
 .. function:: min(x, axis=None, keepdims=False)
 
@@ -945,8 +943,7 @@ Reductions
 		will broadcast correctly against the original tensor.
     :Returns: the index of the minimum value along a given axis
 
-    if axis=None, Theano 0.5rc1 or later: argmin over the flattened tensor (like numpy)
-                  older: then axis is assumed to be ndim(x)-1
+    if axis=None, argmin over the flattened tensor (like numpy)
 
 .. function:: sum(x, axis=None, dtype=None, keepdims=False, acc_dtype=None)
 
@@ -1129,21 +1126,19 @@ Reductions
 Indexing
 ========
 
-Like NumPy, Theano distinguishes between *basic* and *advanced* indexing.
-Theano fully supports basic indexing
+Like NumPy, Aesara distinguishes between *basic* and *advanced* indexing.
+Aesara fully supports basic indexing
 (see `NumPy's indexing  <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html>`_)
 and `integer advanced indexing
 <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_.
-Since version 0.10.0 Theano also supports boolean indexing with boolean
-NumPy arrays or Theano tensors.
 
 Index-assignment is *not* supported.  If you want to do something like ``a[5]
-= b`` or ``a[5]+=b``, see :func:`theano.tensor.subtensor.set_subtensor` and
-:func:`theano.tensor.subtensor.inc_subtensor` below.
+= b`` or ``a[5]+=b``, see :func:`aesara.tensor.subtensor.set_subtensor` and
+:func:`aesara.tensor.subtensor.inc_subtensor` below.
 
-.. autofunction:: theano.tensor.subtensor.set_subtensor
+.. autofunction:: aesara.tensor.subtensor.set_subtensor
 
-.. autofunction:: theano.tensor.subtensor.inc_subtensor
+.. autofunction:: aesara.tensor.subtensor.inc_subtensor
 
 .. _tensor_operator_support:
 
@@ -1182,11 +1177,11 @@ Bitwise
 Inplace
 -------------
 
-In-place operators are *not* supported.  Theano's graph-optimizations
+In-place operators are *not* supported.  Aesara's graph-optimizations
 will determine which intermediate values to use for in-place
 computations.  If you would like to update the value of a
 :term:`shared variable`, consider using the ``updates`` argument to
-:func:`theano.function`.
+:func:`Aesara.function`.
 
 .. _libdoc_tensor_elementwise:
 
@@ -1206,7 +1201,7 @@ Casting
 
     .. testcode:: cast
 
-        import theano.tensor as tt
+        import Aesara.tensor as tt
         x = tt.matrix()
         x_as_int = tt.cast(x, 'int32')
 
@@ -1235,14 +1230,14 @@ The six usual equality and inequality operators share the same interface.
 
   .. note::
 
-    Theano has no boolean dtype.  Instead, all boolean tensors are represented
+    Aesara has no boolean dtype.  Instead, all boolean tensors are represented
     in ``'int8'``.
 
   Here is an example with the less-than operator.
 
   .. testcode:: oper
 
-    import theano.tensor as tt
+    import Aesara.tensor as tt
     x,y = tt.dmatrices('x','y')
     z = tt.le(x,y)
 
@@ -1326,7 +1321,7 @@ Condition
 .. function:: switch(cond, ift, iff)
 
     Returns a variable representing a switch between ift (iftrue) and iff (iffalse)
-     based on the condition cond. This is the theano equivalent of numpy.where.
+     based on the condition cond. This is the Aesara equivalent of numpy.where.
 
       :Parameter:  *cond* - symbolic Tensor (or compatible)
       :Parameter:  *ift* - symbolic Tensor (or compatible)
@@ -1335,7 +1330,7 @@ Condition
 
     .. testcode:: switch
 
-      import theano.tensor as tt
+      import Aesara.tensor as tt
       a,b = tt.dmatrices('a','b')
       x,y = tt.dmatrices('x','y')
       z = tt.switch(tt.lt(a,b), x, y)
@@ -1406,7 +1401,7 @@ Here is an example using the bit-wise ``and_`` via the ``&`` operator:
 
 .. testcode:: bitwise
 
-    import theano.tensor as tt
+    import Aesara.tensor as tt
     x,y = tt.imatrices('x','y')
     z = x & y
 
@@ -1510,7 +1505,7 @@ Mathematical
    Returns a variable representing the survival function (1-cdf —
    sometimes more accurate).
 
-   C code is provided in the Theano_lgpl repository.
+   C code is provided in the Aesara_lgpl repository.
    This makes it faster.
 
    https://github.com/Theano/Theano_lgpl.git
@@ -1550,7 +1545,7 @@ Linear Algebra
 .. function:: tensordot(a, b, axes=2)
 
     Given two tensors a and b,tensordot computes a generalized dot product over
-    the provided axes. Theano's implementation reduces all expressions to
+    the provided axes. Aesara's implementation reduces all expressions to
     matrix or vector dot products and is based on code from Tijmen Tieleman's
     gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
 
@@ -1591,7 +1586,7 @@ Linear Algebra
     :rtype: symbolic tensor
 
     It may be helpful to consider an example to see what tensordot does.
-    Theano's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
+    Aesara's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
     and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
     note that a.shape[1] == b.shape[3] and a.shape[2] == b.shape[2]; these axes
     are compatible. The resulting tensor will have shape (2, 5, 6) -- the
@@ -1754,7 +1749,7 @@ Linear Algebra
 Gradient / Differentiation
 ==========================
 
-.. automodule:: theano.gradient
+.. automodule:: Aesara.gradient
     :members: grad
     :noindex:
 
diff --git a/doc/library/tensor/basic_opt.txt b/doc/library/tensor/basic_opt.txt
index 3b3fd427b8..dc52517012 100644
--- a/doc/library/tensor/basic_opt.txt
+++ b/doc/library/tensor/basic_opt.txt
@@ -7,5 +7,5 @@
    :synopsis: Tensor Optimizations
 .. moduleauthor:: LISA, PyMC Developers
 
-.. automodule:: theano.tensor.basic_opt
+.. automodule:: aesara.tensor.basic_opt
     :members:
diff --git a/doc/library/tensor/elemwise.txt b/doc/library/tensor/elemwise.txt
index 9053034e1c..f97ae46030 100644
--- a/doc/library/tensor/elemwise.txt
+++ b/doc/library/tensor/elemwise.txt
@@ -4,12 +4,12 @@
 
 .. testsetup::
 
-   from theano.tensor.elemwise import *
+   from aesara.tensor.elemwise import *
 
 .. module:: tensor.elemwise
    :platform: Unix, Windows
    :synopsis: Tensor Elemwise
 .. moduleauthor:: LISA
 
-.. automodule:: theano.tensor.elemwise
+.. automodule:: aesara.tensor.elemwise
     :members:
diff --git a/doc/library/tensor/extra_ops.txt b/doc/library/tensor/extra_ops.txt
index 89d56d943d..de74d74006 100644
--- a/doc/library/tensor/extra_ops.txt
+++ b/doc/library/tensor/extra_ops.txt
@@ -4,13 +4,12 @@
 
 .. testsetup:: *
 
-   from theano.tensor.extra_ops import *
+   from aesara.tensor.extra_ops import *
 
 .. module:: tensor.extra_ops
    :platform: Unix, Windows
    :synopsis: Tensor Extra Ops
 .. moduleauthor:: LISA
 
-.. automodule:: theano.tensor.extra_ops
+.. automodule:: aesara.tensor.extra_ops
     :members:
-
diff --git a/doc/library/tensor/fft.txt b/doc/library/tensor/fft.txt
index 91daab79c1..39d4cb374b 100644
--- a/doc/library/tensor/fft.txt
+++ b/doc/library/tensor/fft.txt
@@ -10,10 +10,10 @@ FFT gradients are implemented as the opposite Fourier transform of the output gr
 
 .. warning ::
     The real and imaginary parts of the Fourier domain arrays are stored as a pair of float
-    arrays, emulating complex. Since theano has limited support for complex
+    arrays, emulating complex. Since aesara has limited support for complex
     number operations, care must be taken to manually implement operations such as gradients.
 
-.. automodule:: theano.tensor.fft
+.. automodule:: aesara.tensor.fft
    :members: rfft, irfft
 
 For example, the code below performs the real input FFT of a box function,
@@ -23,14 +23,14 @@ oscillates due to the box function being shifted to the middle of the array.
 .. testcode::
 
     import numpy as np
-    import theano
-    import theano.tensor as tt
-    from theano.tensor import fft
+    import aesara
+    import aesara.tensor as tt
+    from aesara.tensor import fft
 
     x = tt.matrix('x', dtype='float64')
 
     rfft = fft.rfft(x, norm='ortho')
-    f_rfft = theano.function([x], rfft)
+    f_rfft = aesara.function([x], rfft)
 
     N = 1024
     box = np.zeros((1, N), dtype='float64')
diff --git a/doc/library/tensor/io.txt b/doc/library/tensor/io.txt
index 6565b12479..ea27452c53 100644
--- a/doc/library/tensor/io.txt
+++ b/doc/library/tensor/io.txt
@@ -10,16 +10,15 @@
 File operation
 ==============
 
-- Load from disk with the function :func:`load <theano.tensor.io.load>` and its associated op :class:`LoadFromDisk <theano.tensor.io.LoadFromDisk>`
+- Load from disk with the function :func:`load <aesara.tensor.io.load>` and its associated op :class:`LoadFromDisk <aesara.tensor.io.LoadFromDisk>`
 
 MPI operation
 =============
-- Non-blocking transfer: :func:`isend <theano.tensor.io.isend>` and :func:`irecv <theano.tensor.io.irecv>`.
-- Blocking transfer: :func:`send <theano.tensor.io.send>` and :func:`recv <theano.tensor.io.recv>`
+- Non-blocking transfer: :func:`isend <aesara.tensor.io.isend>` and :func:`irecv <aesara.tensor.io.irecv>`.
+- Blocking transfer: :func:`send <aesara.tensor.io.send>` and :func:`recv <aesara.tensor.io.recv>`
 
 Details
 =======
 
-.. automodule:: theano.tensor.io
+.. automodule:: aesara.tensor.io
     :members:
-
diff --git a/doc/library/tensor/math_opt.txt b/doc/library/tensor/math_opt.txt
index acb0118b4d..5022e94f48 100644
--- a/doc/library/tensor/math_opt.txt
+++ b/doc/library/tensor/math_opt.txt
@@ -7,5 +7,5 @@
    :synopsis: Tensor Optimizations for Math Operations
 .. moduleauthor:: LISA, PyMC Developers
 
-.. automodule:: theano.tensor.math_opt
+.. automodule:: aesara.tensor.math_opt
     :members:
diff --git a/doc/library/tensor/nlinalg.txt b/doc/library/tensor/nlinalg.txt
index 7afb847474..a8bb54cf58 100644
--- a/doc/library/tensor/nlinalg.txt
+++ b/doc/library/tensor/nlinalg.txt
@@ -1,4 +1,4 @@
-..  ../../../../theano/sandbox/nlinalg.py
+..  ../../../../aesara/sandbox/nlinalg.py
 
 .. _libdoc_linalg:
 
@@ -18,5 +18,5 @@
 API
 ===
 
-.. automodule:: theano.tensor.nlinalg
+.. automodule:: aesara.tensor.nlinalg
     :members:
diff --git a/doc/library/tensor/nnet/basic.txt b/doc/library/tensor/nnet/basic.txt
index 13eb41d431..501cda1727 100644
--- a/doc/library/tensor/nnet/basic.txt
+++ b/doc/library/tensor/nnet/basic.txt
@@ -4,7 +4,7 @@
 :mod:`basic` -- Basic Ops for neural networks
 ======================================================
 
-.. module:: theano.tensor.nnet.basic
+.. module:: aesara.tensor.nnet.basic
    :platform: Unix, Windows
    :synopsis: Ops for neural networks
 .. moduleauthor:: LISA
@@ -19,14 +19,14 @@
    - :func:`softplus`
    - :func:`softmax`
    - :func:`softsign`
-   - :func:`relu() <theano.tensor.nnet.relu>`
-   - :func:`elu() <theano.tensor.nnet.elu>`
-   - :func:`selu() <theano.tensor.nnet.selu>`
+   - :func:`relu() <aesara.tensor.nnet.relu>`
+   - :func:`elu() <aesara.tensor.nnet.elu>`
+   - :func:`selu() <aesara.tensor.nnet.selu>`
    - :func:`binary_crossentropy`
    - :func:`sigmoid_binary_crossentropy`
    - :func:`.categorical_crossentropy`
-   - :func:`h_softmax() <theano.tensor.nnet.h_softmax>`
-   - :func:`confusion_matrix <theano.tensor.nnet.confusion_matrix>`
+   - :func:`h_softmax() <aesara.tensor.nnet.h_softmax>`
+   - :func:`confusion_matrix <aesara.tensor.nnet.confusion_matrix>`
 
 .. function:: sigmoid(x)
 
@@ -50,7 +50,7 @@
 
    .. testcode::
 
-       import theano.tensor as tt
+       import aesara.tensor as tt
 
        x, y, b = tt.dvectors('x', 'y', 'b')
        W = tt.dmatrix('W')
@@ -66,8 +66,8 @@
     :Return type: same as x
     :Returns: approximated element-wise sigmoid: :math:`sigmoid(x) = \frac{1}{1 + \exp(-x)}`.
     :note: To automatically change all :func:`sigmoid` ops to this version, use
-      the Theano optimization ``local_ultra_fast_sigmoid``. This can be done
-      with the Theano flag ``optimizer_including=local_ultra_fast_sigmoid``.
+      the Aesara optimization ``local_ultra_fast_sigmoid``. This can be done
+      with the Aesara flag ``optimizer_including=local_ultra_fast_sigmoid``.
       This optimization is done late, so it should not affect
       stabilization optimization.
 
@@ -88,8 +88,8 @@
     :Return type: same as x
     :Returns: approximated element-wise sigmoid: :math:`sigmoid(x) = \frac{1}{1 + \exp(-x)}`.
     :note: To automatically change all :func:`sigmoid` ops to this version, use
-      the Theano optimization ``local_hard_sigmoid``. This can be done
-      with the Theano flag ``optimizer_including=local_hard_sigmoid``.
+      the Aesara optimization ``local_hard_sigmoid``. This can be done
+      with the Aesara flag ``optimizer_including=local_hard_sigmoid``.
       This optimization is done late, so it should not affect
       stabilization optimization.
 
@@ -147,11 +147,11 @@
        W = tt.dmatrix('W')
        y = tt.nnet.softmax(tt.dot(W,x) + b)
 
-.. autofunction:: theano.tensor.nnet.relu
+.. autofunction:: aesara.tensor.nnet.relu
 
-.. autofunction:: theano.tensor.nnet.elu
+.. autofunction:: aesara.tensor.nnet.elu
 
-.. autofunction:: theano.tensor.nnet.selu
+.. autofunction:: aesara.tensor.nnet.selu
 
 .. function:: binary_crossentropy(output,target)
 
@@ -239,8 +239,8 @@
 
    .. testsetup::
 
-      import theano
-      o = theano.tensor.ivector()
+      import aesara
+      o = aesara.tensor.ivector()
 
    .. testcode::
 
@@ -249,4 +249,4 @@
        # o is either the above-mentioned 1-of-N vector or 2D tensor
 
 
-.. autofunction:: theano.tensor.nnet.h_softmax
+.. autofunction:: aesara.tensor.nnet.h_softmax
diff --git a/doc/library/tensor/nnet/blocksparse.txt b/doc/library/tensor/nnet/blocksparse.txt
index fb416f4564..b39c75f06c 100644
--- a/doc/library/tensor/nnet/blocksparse.txt
+++ b/doc/library/tensor/nnet/blocksparse.txt
@@ -10,5 +10,5 @@
 .. moduleauthor:: LISA
 
 
-.. automodule:: theano.tensor.nnet.blocksparse
+.. automodule:: aesara.tensor.nnet.blocksparse
     :members:
diff --git a/doc/library/tensor/nnet/bn.txt b/doc/library/tensor/nnet/bn.txt
index b6c01efc29..572e71e36b 100644
--- a/doc/library/tensor/nnet/bn.txt
+++ b/doc/library/tensor/nnet/bn.txt
@@ -10,9 +10,9 @@
 .. moduleauthor:: LISA
 
 
-.. autofunction:: theano.tensor.nnet.bn.batch_normalization_train
-.. autofunction:: theano.tensor.nnet.bn.batch_normalization_test
+.. autofunction:: aesara.tensor.nnet.bn.batch_normalization_train
+.. autofunction:: aesara.tensor.nnet.bn.batch_normalization_test
 
-.. seealso:: cuDNN batch normalization: :class:`theano.gpuarray.dnn.dnn_batch_normalization_train`, :class:`theano.gpuarray.dnn.dnn_batch_normalization_test>`.
+.. seealso:: cuDNN batch normalization: :class:`aesara.gpuarray.dnn.dnn_batch_normalization_train`, :class:`aesara.gpuarray.dnn.dnn_batch_normalization_test>`.
 
-.. autofunction:: theano.tensor.nnet.bn.batch_normalization
+.. autofunction:: aesara.tensor.nnet.bn.batch_normalization
diff --git a/doc/library/tensor/nnet/conv.txt b/doc/library/tensor/nnet/conv.txt
index ad22a5f32d..04b505c495 100644
--- a/doc/library/tensor/nnet/conv.txt
+++ b/doc/library/tensor/nnet/conv.txt
@@ -8,8 +8,8 @@
 
     Two similar implementation exists for conv2d:
 
-        :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>` and
-        :func:`nnet.conv2d <theano.tensor.nnet.conv2d>`.
+        :func:`signal.conv2d <aesara.tensor.signal.conv.conv2d>` and
+        :func:`nnet.conv2d <aesara.tensor.nnet.conv2d>`.
 
     The former implements a traditional
     2D convolution, while the latter implements the convolutional layers
@@ -24,10 +24,10 @@
 
 The recommended user interface are:
 
-- :func:`theano.tensor.nnet.conv2d` for 2d convolution
-- :func:`theano.tensor.nnet.conv3d` for 3d convolution
+- :func:`aesara.tensor.nnet.conv2d` for 2d convolution
+- :func:`aesara.tensor.nnet.conv3d` for 3d convolution
 
-With those new interface, Theano will automatically use the fastest
+With those new interface, Aesara will automatically use the fastest
 implementation in many cases. On the CPU, the implementation is a GEMM
 based one. On the GPU, there is a GEMM based and :ref:`cuDNN
 <libdoc_gpuarray_dnn>` version.
@@ -35,9 +35,9 @@ based one. On the GPU, there is a GEMM based and :ref:`cuDNN
 By default on the GPU, if cuDNN is available, it will be used,
 otherwise we will fall back to using gemm based version (slower than
 cuDNN in most cases and uses more memory). To get an error if cuDNN
-can not be used, you can supply the Theano flag ``dnn.enable=True``.
+can not be used, you can supply the Aesara flag ``dnn.enable=True``.
 
-Either cuDNN and the gemm version can be disabled using the Theano flags
+Either cuDNN and the gemm version can be disabled using the Aesara flags
 ``optimizer_excluding=conv_dnn`` and ``optimizer_excluding=conv_gemm``,
 respectively. If both are disabled, it will raise an error.
 
@@ -62,14 +62,14 @@ for each specific convolution in your graph. For each instance, it will
 compile and benchmark each applicable implementation and choose the
 fastest one. It can be enabled using ``optimizer_including=conv_meta``.
 The meta-optimizer can also selectively disable cudnn and gemm version
-using the Theano flag ``metaopt__optimizer_excluding=conv_dnn`` and
+using the Aesara flag ``metaopt__optimizer_excluding=conv_dnn`` and
 ``metaopt__optimizer_excluding=conv_gemm`` respectively.
 
 
 .. note::
 
-    Theano had older user interface like
-    theano.tensor.nnet.conv.conv2d. Do not use them anymore. They
+    Aesara had older user interface like
+    aesara.tensor.nnet.conv.conv2d. Do not use them anymore. They
     will give you slower code and won't allow easy switch between CPU
     and GPU computation. They also support less type of convolution.
 
@@ -78,14 +78,14 @@ Implementation Details
 ======================
 
 This section gives more implementation detail. Most of the time you do
-not need to read it. Theano will select it for you.
+not need to read it. Aesara will select it for you.
 
 
 - Implemented operators for neural network 2D / image convolution:
-    - :func:`nnet.conv.conv2d <theano.tensor.nnet.conv.conv2d>`.
+    - :func:`nnet.conv.conv2d <aesara.tensor.nnet.conv.conv2d>`.
       old 2d convolution. DO NOT USE ANYMORE.
 
-    - :func:`GpuCorrMM <theano.gpuarray.blas.GpuCorrMM>`
+    - :func:`GpuCorrMM <aesara.gpuarray.blas.GpuCorrMM>`
       This is a GPU-only 2d correlation implementation taken from
       `caffe's CUDA implementation <https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu>`_. It does not flip the kernel.
 
@@ -96,41 +96,41 @@ not need to read it. Theano will select it for you.
       It needs extra memory for the Toeplitz matrix, which is a 2D matrix of shape
       ``(no of channels * filter width * filter height, output width * output height)``.
 
-    - :func:`CorrMM <theano.tensor.nnet.corr.CorrMM>`
+    - :func:`CorrMM <aesara.tensor.nnet.corr.CorrMM>`
       This is a CPU-only 2d correlation implementation taken from
       `caffe's cpp implementation <https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cpp>`_.
       It does not flip the kernel.
-    - :func:`dnn_conv <theano.gpuarray.dnn.dnn_conv>` GPU-only
+    - :func:`dnn_conv <aesara.gpuarray.dnn.dnn_conv>` GPU-only
       convolution using NVIDIA's cuDNN library.
 
 - Implemented operators for neural network 3D / video convolution:
-    - :func:`GpuCorr3dMM <theano.gpuarray.blas.GpuCorr3dMM>`
+    - :func:`GpuCorr3dMM <aesara.gpuarray.blas.GpuCorr3dMM>`
       This is a GPU-only 3d correlation relying on a Toeplitz matrix
-      and gemm implementation (see :func:`GpuCorrMM <theano.sandbox.cuda.blas.GpuCorrMM>`)
+      and gemm implementation (see :func:`GpuCorrMM <aesara.sandbox.cuda.blas.GpuCorrMM>`)
       It needs extra memory for the Toeplitz matrix, which is a 2D matrix of shape
       ``(no of channels * filter width * filter height * filter depth, output width * output height * output depth)``.
-    - :func:`Corr3dMM <theano.tensor.nnet.corr3d.Corr3dMM>`
+    - :func:`Corr3dMM <aesara.tensor.nnet.corr3d.Corr3dMM>`
       This is a CPU-only 3d correlation implementation based on
-      the 2d version (:func:`CorrMM <theano.tensor.nnet.corr.CorrMM>`).
+      the 2d version (:func:`CorrMM <aesara.tensor.nnet.corr.CorrMM>`).
       It does not flip the kernel. As it provides a gradient, you can use it as a
       replacement for nnet.conv3d. For convolutions done on CPU,
       nnet.conv3d will be replaced by Corr3dMM.
 
-    - :func:`dnn_conv3d <theano.gpuarray.dnn.dnn_conv3d>` GPU-only
-      3D convolution using NVIDIA's cuDNN library (as :func:`dnn_conv <theano.gpuarray.dnn.dnn_conv>` but for 3d).
+    - :func:`dnn_conv3d <aesara.gpuarray.dnn.dnn_conv3d>` GPU-only
+      3D convolution using NVIDIA's cuDNN library (as :func:`dnn_conv <aesara.gpuarray.dnn.dnn_conv>` but for 3d).
 
-      If cuDNN is available, by default, Theano will replace all nnet.conv3d
+      If cuDNN is available, by default, Aesara will replace all nnet.conv3d
       operations with dnn_conv.
 
-    - :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
+    - :func:`conv3d2d <aesara.tensor.nnet.conv3d2d.conv3d>`
       Another conv3d implementation that uses the conv2d with data reshaping.
       It is faster in some corner cases than conv3d. It flips the kernel.
 
-.. autofunction:: theano.tensor.nnet.conv2d
-.. autofunction:: theano.tensor.nnet.conv2d_transpose
-.. autofunction:: theano.tensor.nnet.conv3d
-.. autofunction:: theano.tensor.nnet.conv3d2d.conv3d
-.. autofunction:: theano.tensor.nnet.conv.conv2d
+.. autofunction:: aesara.tensor.nnet.conv2d
+.. autofunction:: aesara.tensor.nnet.conv2d_transpose
+.. autofunction:: aesara.tensor.nnet.conv3d
+.. autofunction:: aesara.tensor.nnet.conv3d2d.conv3d
+.. autofunction:: aesara.tensor.nnet.conv.conv2d
 
-.. automodule:: theano.tensor.nnet.abstract_conv
+.. automodule:: aesara.tensor.nnet.abstract_conv
     :members:
diff --git a/doc/library/tensor/nnet/ctc.txt b/doc/library/tensor/nnet/ctc.txt
index fe3a372d39..8a78ee9f21 100644
--- a/doc/library/tensor/nnet/ctc.txt
+++ b/doc/library/tensor/nnet/ctc.txt
@@ -1,7 +1,7 @@
 .. _libdoc_tensor_nnet_ctc:
 
 ==================================================================================
-:mod:`theano.tensor.nnet.ctc` -- Connectionist Temporal Classification (CTC) loss
+:mod:`aesara.tensor.nnet.ctc` -- Connectionist Temporal Classification (CTC) loss
 ==================================================================================
 
 .. note::
@@ -22,11 +22,10 @@
     Unfortunately, Windows platforms are not yet supported by the underlying
     library.
 
-.. module:: theano.tensor.nnet.ctc
+.. module:: aesara.tensor.nnet.ctc
    :platform: Unix
    :synopsis: Connectionist temporal classification (CTC) loss Op, using the warp-ctc library
 .. moduleauthor:: `João Victor Risso <https://github.com/joaovictortr>`_
 
-.. autofunction:: theano.tensor.nnet.ctc.ctc
-.. autoclass:: theano.tensor.nnet.ctc.ConnectionistTemporalClassification
-
+.. autofunction:: aesara.tensor.nnet.ctc.ctc
+.. autoclass:: aesara.tensor.nnet.ctc.ConnectionistTemporalClassification
diff --git a/doc/library/tensor/nnet/index.txt b/doc/library/tensor/nnet/index.txt
index ec97976a91..93dce98ea6 100644
--- a/doc/library/tensor/nnet/index.txt
+++ b/doc/library/tensor/nnet/index.txt
@@ -4,12 +4,12 @@
 :mod:`nnet`  -- Ops related to neural networks
 ==================================================
 
-.. module:: theano.tensor.nnet
+.. module:: aesara.tensor.nnet
    :platform: Unix, Windows
    :synopsis: various ops relating to neural networks
 .. moduleauthor:: LISA
 
-Theano was originally developed for machine learning applications, particularly
+Aesara was originally developed for machine learning applications, particularly
 for the topic of deep learning. As such, our lab has developed many functions
 and ops which are particular to neural networks and deep learning.
 
diff --git a/doc/library/tensor/nnet/neighbours.txt b/doc/library/tensor/nnet/neighbours.txt
index c4d3753e54..afc8a992c0 100644
--- a/doc/library/tensor/nnet/neighbours.txt
+++ b/doc/library/tensor/nnet/neighbours.txt
@@ -4,7 +4,7 @@
 :mod:`neighbours` -- Ops for working with images in convolutional nets
 =======================================================================
 
-.. module:: theano.tensor.nnet.neighbours
+.. module:: aesara.tensor.nnet.neighbours
    :platform: Unix, Windows
    :synopsis: Ops for working with images in conv nets
 .. moduleauthor:: LISA
@@ -13,9 +13,9 @@
 Functions
 =========
 
-.. autofunction:: theano.tensor.nnet.neighbours.images2neibs
+.. autofunction:: aesara.tensor.nnet.neighbours.images2neibs
 
-.. autofunction:: theano.tensor.nnet.neighbours.neibs2images
+.. autofunction:: aesara.tensor.nnet.neighbours.neibs2images
 
 
 See also
diff --git a/doc/library/tensor/random/basic.txt b/doc/library/tensor/random/basic.txt
index 623faf408b..96b01df112 100644
--- a/doc/library/tensor/random/basic.txt
+++ b/doc/library/tensor/random/basic.txt
@@ -5,12 +5,12 @@
 :mod:`random` -- Low-level random numbers
 =============================================
 
-.. module:: theano.tensor.random
+.. module:: aesara.tensor.random
    :synopsis: symbolic random variables
 .. moduleauthor:: pymc-team
 
 
-The `theano.tensor.random` module provides random-number drawing functionality
+The `aesara.tensor.random` module provides random-number drawing functionality
 that closely resembles the `numpy.random` module.
 
 Reference
@@ -24,7 +24,7 @@ Reference
 
    .. testcode:: constructors
 
-      from theano.tensor.random.utils import RandomStream
+      from aesara.tensor.random.utils import RandomStream
 
       rng = RandomStream()
       sample = rng.normal(0, 1, size=(2, 2))
diff --git a/doc/library/tensor/random/utils.txt b/doc/library/tensor/random/utils.txt
index f5c58ca0f7..a730c6a6ca 100644
--- a/doc/library/tensor/random/utils.txt
+++ b/doc/library/tensor/random/utils.txt
@@ -4,7 +4,7 @@
 :mod:`utils` -- Friendly random numbers
 ======================================================
 
-.. module:: theano.tensor.random.utils
+.. module:: aesara.tensor.random.utils
    :platform: Unix, Windows
    :synopsis: symbolic random variables
 .. moduleauthor:: LISA
@@ -12,11 +12,11 @@
 Guide
 =====
 
-Since Theano uses a functional design, producing pseudo-random numbers in a
+Since Aesara uses a functional design, producing pseudo-random numbers in a
 graph is not quite as straightforward as it is in numpy.
 
-The way to think about putting randomness into Theano's computations is to
-put random variables in your graph.  Theano will allocate a numpy RandomState
+The way to think about putting randomness into Aesara's computations is to
+put random variables in your graph.  Aesara will allocate a numpy RandomState
 object for each such variable, and draw from it as necessary.  We will call this sort of sequence of
 random numbers a *random stream*.
 
diff --git a/doc/library/tensor/signal/conv.txt b/doc/library/tensor/signal/conv.txt
index 9ec979805b..f0cca18ea7 100644
--- a/doc/library/tensor/signal/conv.txt
+++ b/doc/library/tensor/signal/conv.txt
@@ -8,8 +8,8 @@
 
     Two similar implementation exists for conv2d:
 
-        :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>` and
-        :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`.
+        :func:`signal.conv2d <aesara.tensor.signal.conv.conv2d>` and
+        :func:`nnet.conv2d <aesara.tensor.nnet.conv.conv2d>`.
 
     The former implements a traditional
     2D convolution, while the latter implements the convolutional layers
@@ -21,9 +21,8 @@
    :synopsis: ops for performing convolutions
 .. moduleauthor:: LISA
 
-.. autofunction:: theano.tensor.signal.conv.conv2d
+.. autofunction:: aesara.tensor.signal.conv.conv2d
 
 .. function:: fft(*todo)
 
     [James has some code for this, but hasn't gotten it into the source tree yet.]
-
diff --git a/doc/library/tensor/signal/downsample.txt b/doc/library/tensor/signal/downsample.txt
index 2a6a1fa5f4..84d579f65b 100644
--- a/doc/library/tensor/signal/downsample.txt
+++ b/doc/library/tensor/signal/downsample.txt
@@ -9,6 +9,6 @@
    :synopsis: ops for performing various forms of downsampling
 .. moduleauthor:: LISA
 
-.. note:: 
+.. note::
 
-    This module is deprecated. Use the functions in :func:`theano.tensor.nnet.signal.pool`
+    This module is deprecated. Use the functions in :func:`aesara.tensor.nnet.signal.pool`
diff --git a/doc/library/tensor/signal/pool.txt b/doc/library/tensor/signal/pool.txt
index 6d7a808948..ae81c0156f 100644
--- a/doc/library/tensor/signal/pool.txt
+++ b/doc/library/tensor/signal/pool.txt
@@ -9,8 +9,8 @@
    :synopsis: ops for performing various forms of downsampling
 .. moduleauthor:: LISA
 
-.. seealso:: :func:`theano.tensor.nnet.neighbours.images2neibs`
+.. seealso:: :func:`aesara.tensor.nnet.neighbours.images2neibs`
 
-.. autofunction:: theano.tensor.signal.pool.pool_2d
-.. autofunction:: theano.tensor.signal.pool.max_pool_2d_same_size
-.. autofunction:: theano.tensor.signal.pool.pool_3d
+.. autofunction:: aesara.tensor.signal.pool.pool_2d
+.. autofunction:: aesara.tensor.signal.pool.max_pool_2d_same_size
+.. autofunction:: aesara.tensor.signal.pool.pool_3d
diff --git a/doc/library/tensor/slinalg.txt b/doc/library/tensor/slinalg.txt
index 47cf375ac5..c85372f30a 100644
--- a/doc/library/tensor/slinalg.txt
+++ b/doc/library/tensor/slinalg.txt
@@ -1,4 +1,4 @@
-..  ../../../../theano/sandbox/slinalg.py
+..  ../../../../aesara/sandbox/slinalg.py
 
 .. _libdoc_slinalg:
 
@@ -18,7 +18,7 @@
 API
 ===
 
-.. automodule:: theano.tensor.slinalg
+.. automodule:: aesara.tensor.slinalg
     :members:
     :exclude-members: solve, solve_lower_triangular, solve_upper_triangular
 
diff --git a/doc/library/tensor/utils.txt b/doc/library/tensor/utils.txt
index 92cd995265..e8ca6f3648 100644
--- a/doc/library/tensor/utils.txt
+++ b/doc/library/tensor/utils.txt
@@ -4,13 +4,12 @@
 
 .. testsetup::
 
-   from theano.tensor.utils import *
+   from aesara.tensor.utils import *
 
 .. module:: tensor.utils
    :platform: Unix, Windows
    :synopsis: Tensor Utils
 .. moduleauthor:: LISA
 
-.. automodule:: theano.tensor.utils
+.. automodule:: aesara.tensor.utils
     :members:
-
diff --git a/doc/library/typed_list.txt b/doc/library/typed_list.txt
index a601fa1561..cb246cad4a 100644
--- a/doc/library/typed_list.txt
+++ b/doc/library/typed_list.txt
@@ -10,18 +10,18 @@
 
 .. note::
 
-    This works, but is not well integrated with the rest of Theano. If
+    This works, but is not well integrated with the rest of Aesara. If
     speed is important, it is probably better to pad to a dense
     tensor.
 
-This is a type that represents a list in Theano. All elements must have
-the same Theano type. Here is an example:
+This is a type that represents a list in Aesara. All elements must have
+the same Aesara type. Here is an example:
 
->>> import theano.typed_list
->>> tl = theano.typed_list.TypedListType(theano.tensor.fvector)()
->>> v = theano.tensor.fvector()
->>> o = theano.typed_list.append(tl, v)
->>> f = theano.function([tl, v], o)
+>>> import aesara.typed_list
+>>> tl = aesara.typed_list.TypedListType(aesara.tensor.fvector)()
+>>> v = aesara.tensor.fvector()
+>>> o = aesara.typed_list.append(tl, v)
+>>> f = aesara.function([tl, v], o)
 >>> f([[1, 2, 3], [4, 5]], [2])
 [array([ 1.,  2.,  3.], dtype=float32), array([ 4.,  5.], dtype=float32), array([ 2.], dtype=float32)]
 
@@ -29,15 +29,15 @@ A second example with Scan. Scan doesn't yet have direct support of
 TypedList, so you can only use it as non_sequences (not in sequences or
 as outputs):
 
->>> import theano.typed_list
->>> a = theano.typed_list.TypedListType(theano.tensor.fvector)()
->>> l = theano.typed_list.length(a)
->>> s, _ = theano.scan(fn=lambda i, tl: tl[i].sum(),
+>>> import aesara.typed_list
+>>> a = aesara.typed_list.TypedListType(aesara.tensor.fvector)()
+>>> l = aesara.typed_list.length(a)
+>>> s, _ = aesara.scan(fn=lambda i, tl: tl[i].sum(),
 ...                    non_sequences=[a],
-...                    sequences=[theano.tensor.arange(l, dtype='int64')])
->>> f = theano.function([a], s)
+...                    sequences=[aesara.tensor.arange(l, dtype='int64')])
+>>> f = aesara.function([a], s)
 >>> f([[1, 2, 3], [4, 5]])
 array([ 6.,  9.], dtype=float32)
 
-.. automodule:: theano.typed_list.basic
+.. automodule:: aesara.typed_list.basic
     :members:
diff --git a/doc/links.txt b/doc/links.txt
index d09f9959ba..54a944dd86 100644
--- a/doc/links.txt
+++ b/doc/links.txt
@@ -8,13 +8,13 @@ Links
 This page lists links to various resources.
 
 
-Theano requirements
+Aesara requirements
 -------------------
 
 - git_: A distributed revision control system (RCS).
 - pytest_: A system for unit testing.
 - numpy_: A library for efficient numerical computing.
-- python_: The programming language Theano is for.
+- python_: The programming language in which Aesara is written.
 - scipy_: A library for scientific computing.
 
 
diff --git a/doc/nextml2015/Makefile b/doc/nextml2015/Makefile
deleted file mode 100644
index 4f24a8df70..0000000000
--- a/doc/nextml2015/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-presentation.pdf: presentation.tex
-	pdflatex presentation.tex
-	pdflatex presentation.tex
-
-clean:
-	rm *.toc *.snm *.aux *.log *.nav *.out *.vrb
diff --git a/doc/nextml2015/presentation.tex b/doc/nextml2015/presentation.tex
deleted file mode 100644
index a281ed5ec6..0000000000
--- a/doc/nextml2015/presentation.tex
+++ /dev/null
@@ -1,1158 +0,0 @@
-\documentclass[utf8x,xcolor=pdftex,dvipsnames,table]{beamer}
-\usetheme{Malmoe}  % Now it's a beamer presentation with the lisa theme!
-\setbeamertemplate{footline}[page number]
-\usecolortheme{beaver}
-\usepackage[T1]{fontenc}
-\usepackage{amsmath}
-\usepackage[utf8x]{inputenc}
-%\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}}
-\usepackage{listings}
-
-\newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}}
-
-\mode<presentation>
-
-\title{Theano and LSTM for Sentiment Analysis}
-
-\author{%
-\footnotesize
-Frédéric Bastien \newline
-Département d'Informatique et de Recherche Opérationnelle \newline
-Université de Montréal \newline
-Montréal, Canada \newline
-\texttt{bastienf@iro.umontreal.ca} \newline \newline
-Presentation prepared with Pierre Luc Carrier, KyungHyun Cho and \newline
- Çağlar Gülçehre
-}
-
-\date{Next.ML 2015}
-
-\setbeamertemplate{navigation symbols}{}
-\begin{document}
-
-\begin{frame}[plain]
- \titlepage
- \vspace{-5em}
- \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png}
- \hfill
- \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop}
-\end{frame}
-
-\section{Introduction}
-\begin{frame}
-  \frametitle{Task}
-
-This is a classification task where, given a review of a movie, we
-need to establish whether the movie review is positive or negative.
-
-We use the IMDB dataset.
-\end{frame}
-
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-
-\begin{frame}{High level}\setcounter{page}{1}
-  Python <- \{NumPy/SciPy/libgpuarray\} <- Theano <- Pylearn2
-  \begin{itemize}
-  \item Python: OO coding language
-  \item Numpy: $n$-dimensional array object and scientific computing toolbox
-  \item SciPy: sparse matrix objects and more scientific computing functionality
-  \item libgpuarray: GPU $n$-dimensional array object in C for CUDA and OpenCL
-  \item Theano: compiler/symbolic graph manipulation
-  \item Pylearn2: machine learning framework for researchers
-  \end{itemize}
-\end{frame}
-
-%% \begin{frame}{Others}
-%%   \begin{itemize}
-%%   \item IPython: Advanced python shell
-%%   \item IPython notebook: web-based interactive computational environment where you can combine code execution, text, mathematics, plots and rich media into a single document
-%%   \item matplotlib: one of the many plotting library
-%%  \item PyTables: hdf5 container with extra functionality
-%%  \item pandas: other data structure
-%%  \item ...
-%%   \end{itemize}
-%% \end{frame}
-
-\begin{frame}{Python}
-  \begin{itemize}
-  \item General-purpose high-level OO interpreted language
-  \item Emphasizes code readability
-  \item Comprehensive standard library
-  \item Dynamic type and memory management
-  \item Slow execution
-  \item Easily extensible with C
-  \item Popular in {\em web development}\ and {\em scientific communities}
-  \end{itemize}
-\end{frame}
-
-\begin{frame}{NumPy/SciPy}
-  \begin{itemize}
-  \item Python floats are full-fledged objects on the heap
-      \begin{itemize}
-      \item Not suitable for high-performance computing!
-      \end{itemize}
-
-  \item NumPy provides an $n$-dimensional numeric array in Python
-      \begin{itemize}
-      \item Perfect for high-performance computing
-      \item Slices of arrays are views (no copying)
-      \end{itemize}
-
-  \item NumPy provides
-      \begin{itemize}
-      \item Elementwise computations
-      \item Linear algebra, Fourier transforms
-      \item Pseudorandom number generators (many distributions)
-      \end{itemize}
-
-  \item SciPy provides lots more, including
-      \begin{itemize}
-      \item Sparse matrices
-      \item More linear algebra
-      \item Solvers and optimization algorithms
-      \item Matlab-compatible I/O
-      \item I/O and signal processing for images and audio
-      \end{itemize}
-  \end{itemize}
-\end{frame}
-
-\begin{frame}{What's missing?}
-  \begin{itemize}
-    \item Non-lazy evaluation (required by Python) hurts performance
-    \item Bound to the CPU
-    \item Lacks symbolic or automatic differentiation
-    \item No automatic speed and stability optimization
-  \end{itemize}
-
-\end{frame}
-
-\begin{frame}{Goal of the stack}
-\begin{center}
-\begin{bf}Fast to develop\end{bf}\newline \bigskip
-\begin{bf}Fast to run\end{bf}\newline \bigskip
-\hspace{-2.5cm}
-\includegraphics[width=0.35\textwidth]{../omlw2014/road-runner-1.jpg}
-\end{center}
-\end{frame}
-
-
-\section{Theano}
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-\begin{frame}{Description}
-
-  High-level domain-specific language for numeric computation.
-
-  \begin{itemize}
-    \item Syntax as close to NumPy as possible
-    \item Compiles most common expressions to C for CPU and/or GPU
-    \item Limited expressivity means more opportunities for optimizations
-    \begin{itemize}
-      \item No subroutines -> global optimization
-      \item Strongly typed -> compiles to C
-      \item Array oriented -> easy parallelism
-      \item Support for looping and branching in expressions
-    \end{itemize}
-    \item Automatic speed and stability optimizations
-    \item Can reuse other technologies for best performance.
-    \begin{itemize}
-      \item BLAS, SciPy, Cython, Numba, PyCUDA, CUDA, ...
-    \end{itemize}
-    \item Automatic differentiation and R op
-    \item Sparse matrices (CPU only)
-    \item Extensive unit-testing and self-verification
-    \item Works on Linux, OS X and Windows
-  \end{itemize}
-\end{frame}
-
-
-
-%% \begin{frame}{Why scripting for GPUs?}
-%%   \begin{bf}They complement each other\end{bf}
-
-%%   GPUs are everything that high level languages are not
-
-%%   \begin{itemize}
-%%     \item Highly parallel
-%%     \item Very architecture-sensitive
-%%     \item Built for maximum FP/memory throughput
-%%     \item So hard to program that meta-programming is easier
-%%   \end{itemize}
-
-%%   \begin{bf}Best of both worlds:\end{bf} easily scripted code which invokes high-performance GPU kernels.
-
-%%   \begin{bf}Theano C code generation removes overhead\end{bf} of
-%%   function calls between Python and C by launching many C functions at once.
-
-%% \end{frame}
-
-\begin{frame}{Project status?}
-  \begin{itemize}
-    \item Mature: Theano has been developed and used since January 2008 (7 yrs old)
-    \item Driven hundreds research papers
-    \item Good user documentation
-    \item Active mailing list with participants from outside our lab
-    \item Core technology for a few Silicon-Valley start-ups
-    \item Many contributors (some from outside our lab)
-    \item Used to teach many university classes
-    \item Has been used for research at big compagnies
-  \end{itemize}
-  Theano: \url{deeplearning.net/software/theano/}
-
-  Deep Learning Tutorials: \url{deeplearning.net/tutorial/}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Simple example}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-import theano
-# declare symbolic variable
-a = theano.tensor.vector("a")
-
-# build symbolic expression
-b = a + a ** 10
-
-# compile function
-f = theano.function([a], b)
-
-# Execute with numerical value
-print f([0, 1, 2])
-# prints `array([0, 2, 1026])`
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}{Simple example}
-\center
-\includegraphics[width=0.35\textwidth]{../hpcs2011_tutorial/pics/f_unoptimized.png}
-\hspace{0.1\textwidth}
-\includegraphics[width=0.35\textwidth]{../hpcs2011_tutorial/pics/f_optimized.png}
-\end{frame}
-
-
-%% \begin{frame}{Overview of Library}
-%%   Theano is many things
-%%   \begin{itemize}
-%%   \item Language
-%%   \item Compiler
-%%   \item Python library
-%%   \end{itemize}
-%% \end{frame}
-
-\begin{frame}{Overview Language}
-  \begin{itemize}
-  \item Operations on scalar, vector, matrix, tensor, and sparse variables
-  \item Linear algebra
-  \item Element-wise nonlinearities
-  \item Convolution
-  \item Indexing, slicing and advanced indexing.
-  \item Reduction
-  \item Dimshuffle (n-dim transpose)
-  \item Extensible
-  \end{itemize}
-\end{frame}
-
-
-\begin{frame}[fragile]
-  \frametitle{Scalar math}
-Some example of scalar operations:
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-import theano
-from theano import tensor as tt
-x = tt.scalar()
-y = tt.scalar()
-z = x+y
-w = z*x
-a = tt.sqrt(w)
-b = tt.exp(a)
-c = a ** b
-d = tt.log(c)
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Vector math}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-from theano import tensor as tt
-x = tt.vector()
-y = tt.vector()
-# Scalar math applied elementwise
-a = x * y
-# Vector dot product
-b = tt.dot(x, y)
-# Broadcasting (as NumPy, very powerful)
-c = a + b
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Matrix math}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-from theano import tensor as tt
-x = tt.matrix()
-y = tt.matrix()
-a = tt.vector()
-# Matrix-matrix product
-b = tt.dot(x, y)
-# Matrix-vector product
-c = tt.dot(x, a)
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Tensors}
-  Using Theano:
-  \begin{itemize}
-  \item Dimensionality defined by length of ``broadcastable'' argument
-  \item Can add (or do other elemwise op) on two
-    tensors with same dimensionality
-  \item Duplicate tensors along broadcastable axes to make size match
-  \end{itemize}
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-from theano.tensor.type import TensorType
-
-tensor3 = TensorType(
-    broadcastable=(False, False, False),
-    dtype='float32')
-x = tensor3()
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Reductions}
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-from theano.tensor.type import TensorType
-
-tensor3 = TensorType(
-    broadcastable=(False, False, False),
-    dtype='float32')
-x = tensor3()
-total = x.sum()
-marginals = x.sum(axis=(0, 2))
-mx = x.max(axis=1)
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Dimshuffle}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-from theano.tensor.type import TensorType
-
-tensor3 = TensorType(
-    broadcastable=(False, False, False))
-x = tensor3()
-y = x.dimshuffle((2, 1, 0))
-a = tt.matrix()
-b = a.tt
-# Same as b
-c = a.dimshuffle((0, 1))
-# Adding to larger tensor
-d = a.dimshuffle((0, 1, 'x'))
-e = a + d
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Indexing}
-  As NumPy!
-  This mean all slices, index selection return view
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-# return views, supported on GPU
-a_tensor[int]
-a_tensor[int, int]
-a_tensor[start:stop:step, start:stop:step]
-a_tensor[::-1] # reverse the first dimension
-
-# Advanced indexing, return copy
-a_tensor[an_index_vector] # Supported on GPU
-a_tensor[an_index_vector, an_index_vector]
-a_tensor[int, an_index_vector]
-a_tensor[an_index_tensor, ...]
-\end{lstlisting}
-\end{frame}
-
-\subsection{Compiling/Running}
-\begin{frame}{Compiling and running expression}
-  \begin{itemize}
-  \item theano.function
-  \item shared variables and updates
-  \item compilation modes
-  \item compilation for GPU
-  \item optimizations
-  \end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{theano.function}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
->>> from theano import tensor as tt
->>> x = tt.scalar()
->>> y = tt.scalar()
->>> from theano import function
->>> # first arg is list of SYMBOLIC inputs
->>> # second arg is SYMBOLIC output
->>> f = function([x, y], x + y)
->>> # Call it with NUMERICAL values
->>> # Get a NUMERICAL output
->>> f(1., 2.)
-array(3.0)
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}{Shared variables}
-  \begin{itemize}
-  \item It’s hard to do much with purely functional programming
-  \item ``shared variables'' add just a little bit of imperative programming
-  \item A ``shared variable'' is a buffer that stores a numerical value for a Theano variable
-  \item Can write to as many shared variables as you want, once each, at the end of the function
-  \item  Modify outside Theano function with get\_value() and set\_value() methods.
-  \end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Shared variable example}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
->>> from theano import shared
->>> x = shared(0.)
->>> from theano.compat.python2x import OrderedDict
->>> updates = OrderedDict()
->>> updates[x] = x + 1
->>> f = function([], updates=updates)
->>> f()
->>> x.get_value()
-1.0
->>> x.set_value(100.)
->>> f()
->>> x.get_value()
-101.0
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}{Which dict?}
-  \begin{itemize}
-  \item Use theano.compat.python2x.OrderedDict
-  \item Not collections.OrderedDict
-  \begin{itemize}
-  \item This isn’t available in older versions of python
-  \end{itemize}
-  \item Not \{\} aka dict
-  \begin{itemize}
-  \item The iteration order of this built-in class is not
-    deterministic (thanks, Python!) so if Theano
-    accepted this, the same script could compile
-    different C programs each time you run it
-  \end{itemize}
-  \end{itemize}
-\end{frame}
-
-\begin{frame}{Compilation modes}
-  \begin{itemize}
-  \item Can compile in different modes to get different kinds of programs
-  \item Can specify these modes very precisely with arguments to theano.function
-  \item Can use a few quick presets with environment variable flags
-  \end{itemize}
-\end{frame}
-
-\begin{frame}{Example preset compilation modes}
-  \begin{itemize}
-  \item FAST\_RUN: default. Fastest execution, slowest compilation
-  \item FAST\_COMPILE: Fastest compilation, slowest execution. No C code.
-  \item DEBUG\_MODE: Adds lots of checks.
-Raises error messages in situations other
-modes regard as fine.
-  \item optimizer=fast\_compile: as mode=FAST\_COMPILE, but with C code.
-  \item theano.function(..., mode=``FAST\_COMPILE'')
-  \item THEANO\_FLAGS=mode=FAST\_COMPILE python script.py
-  \end{itemize}
-\end{frame}
-
-\begin{frame}{Compilation for GPU}
-  \begin{itemize}
-  \item Theano current back-end only supports 32 bit on GPU
-  \item libgpuarray (new-backend) support all dtype
-  \item CUDA supports 64 bit, but is slow on gamer GPUs
-  \item tt.fscalar, tt.fvector, tt.fmatrix are all 32 bit
-  \item tt.scalar, tt.vector, tt.matrix resolve to 32 bit or 64 bit depending on theano’s floatX flag
-  \item floatX is float64 by default, set it to float32
-  \item Set device flag to gpu (or a specific gpu, like gpu0)
-  \item Flag: warn\_float64={'ignore', 'warn', 'raise', 'pdb'}
-  \end{itemize}
-\end{frame}
-
-\subsection{Modifying expressions}
-\begin{frame}{Modifying expressions}
-  \begin{itemize}
-  \item The grad method
-  \item Others
-
-%  \item Variable nodes
-%  \item Types
-%  \item Ops
-%  \item Apply nodes
-  \end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{The grad method}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
->>> x = tt.scalar('x')
->>> y = 2. * x
->>> g = tt.grad(y, x)
-# Print the not optimized graph
->>> theano.printing.pydotprint(g)
-\end{lstlisting}
-\includegraphics[width=0.75\textwidth]{theano_grad.png}
-\end{frame}
-
-%% \begin{frame}{Theano Variables}
-%%   \begin{itemize}
-%%   \item A Variable is a theano expression
-%%   \item Can come from tt.scalar, tt.matrix, etc.
-%%   \item Can come from doing operations on other Variables
-%%   \item Every Variable has a type field, identifying its Type \newline
-%%     e.g. TensorType((True, False), ‘float32’)
-%%   \item Variables can be thought of as nodes in a graph
-%%   \end{itemize}
-%% \end{frame}
-
-%% \begin{frame}{Ops}
-
-%%   \begin{itemize}
-%%   \item  An Op is any class that describes a
-%% mathematical function of some variables
-%%   \item Can call the op on some variables to get a
-%% new variable or variables
-%%   \item An Op class can supply other forms of
-%% information about the function, such as its
-%% derivatives
-%%   \end{itemize}
-%% \end{frame}
-
-%% \begin{frame}{Apply nodes}
-%%   \begin{itemize}
-%%   \item The Apply class is a specific instance of an application of an Op
-%%   \item Notable fields:
-%%     \begin{itemize}
-%%     \item op: The Op to be applied
-%%     \item inputs: The Variables to be used as input
-%%     \item outputs: The Variables produced
-%%     \end{itemize}
-%%   \item Variable.owner identifies the Apply that created the variable
-%%   \item Variable and Apply instances are nodes and owner/
-%%     inputs/outputs identify edges in a Theano graph
-%%   \end{itemize}
-%% \end{frame}
-
-\begin{frame}{Others}
-  \begin{itemize}
-  \item R\_op, L\_op for hessian free
-  \item hessian
-  \item jacobian
-  \item you can navigate the graph if you need
-      (go from the result of computation to its input, recursively)
-  \end{itemize}
-\end{frame}
-
-\subsection{Debugging}
-\begin{frame}{Debugging}
-  \begin{itemize}
-  \item DEBUG\_MODE
-  \item Error message
-  \item theano.printing.debugprint
-  \end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Error message: code}
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-import numpy as np
-import theano
-import theano.tensor as tt
-x = tt.vector()
-y = tt.vector()
-z = x + x
-z = z + y
-f = theano.function([x, y], z)
-f(np.ones((2,)), np.ones((3,)))
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Error message: 1st part}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        basicstyle=\scriptsize
-        }
-\begin{lstlisting}
-Traceback (most recent call last):
-[...]
-ValueError: Input dimension mis-match.
-    (input[0].shape[0] = 3, input[1].shape[0] = 2)
-Apply node that caused the error:
-   Elemwise{add,no_inplace}(<TensorType(float64, vector)>,
-                            <TensorType(float64, vector)>,
-                            <TensorType(float64, vector)>)
-Inputs types: [TensorType(float64, vector),
-               TensorType(float64, vector),
-               TensorType(float64, vector)]
-Inputs shapes: [(3,), (2,), (2,)]
-Inputs strides: [(8,), (8,), (8,)]
-Inputs scalar values: ['not scalar', 'not scalar', 'not scalar']
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Error message: 2st part}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        basicstyle=\footnotesize
-        }
-\begin{lstlisting}
-HINT: Re-running with most Theano optimization
-disabled could give you a back-traces when this
-node was created. This can be done with by setting
-the Theano flags optimizer=fast_compile
-HINT: Use the Theano flag 'exception_verbosity=high'
-for a debugprint of this apply node.
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Error message: exception\_verbosity=high}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        basicstyle=\scriptsize,
-        xleftmargin=-1em
-        }
-\begin{lstlisting}
-Debugprint of the apply node:
-Elemwise{add,no_inplace} [@A] <TensorType(float64, vector)> ''
- |<TensorType(float64, vector)> [@B] <TensorType(float64, vector)>
- |<TensorType(float64, vector)> [@C] <TensorType(float64, vector)>
- |<TensorType(float64, vector)> [@C] <TensorType(float64, vector)>
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Error message: optimizer=fast\_compile}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-Backtrace when the node is created:
-  File "test.py", line 7, in <module>
-    z = z + y
-
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Error message: Traceback}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        basicstyle=\footnotesize,
-        xleftmargin=-1em
-        }
-\begin{lstlisting}
-Traceback (most recent call last):
-  File "test.py", line 9, in <module>
-    f(np.ones((2,)), np.ones((3,)))
-  File "/u/bastienf/repos/theano/compile/function/types.py",
-       line 589, in __call__
-    self.fn.thunks[self.fn.position_of_error])
-  File "/u/bastienf/repos/theano/compile/function/types.py",
-       line 579, in __call__
-    outputs = self.fn()
-
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{debugprint}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
->>> from theano.printing import debugprint
->>> debugprint(a)
-Elemwise{mul,no_inplace} [@A] ''
- |TensorConstant{2.0} [@B]
- |Elemwise{add,no_inplace} [@C] 'z'
-   |<TensorType(float64, scalar)> [@D]
-   |<TensorType(float64, scalar)> [@E]
-\end{lstlisting}
-\end{frame}
-
-%% \begin{frame}{Pylearn2}
-
-%%   Machine Learning library aimed at researchers
-
-%%   \begin{itemize}
-%%     \item Built on top of Theano, for fast execution and use of GPU
-%%     \item Easy to try variants of implemented algorithms, and to extend them (using Theano)
-%%     \item Very modular, each component of the library can be used in isolation
-%%     \item Experiments can be specified through a YAML config file, or by a Python script
-%%     \item Scripts for visualizing weights, plot monitored values
-%%   \end{itemize}
-%% \end{frame}
-
-
-%% \begin{frame}{libgpuarray}
-%%   Goal: A common GPU $n$-dimensional array that can be reused by all projects, support for both CUDA and OpenCL.
-%%   \newline \newline
-%%   Motivation:
-%%   \begin{itemize}
-%%   \item Currently there are at least 6 different GPU arrays in Python
-%%     \begin{itemize}
-%%     \item CudaNdarray (Theano), GPUArray (pycuda), CUDAMatrix (cudamat), GPUArray (pyopencl), Clyther, Copperhead, ...
-%%     \item There are even more if we include other languages.
-%%     \end{itemize}
-%%   \item They are incompatible
-%%     \begin{itemize}
-%%     \item None have the same properties and interface
-%%     \end{itemize}
-%%   \item All of them implement a subset of numpy.ndarray properties
-%%   \item This is the new GPU backend on Theano
-%%   \end{itemize}
-%% \end{frame}
-
-
-%% \begin{frame}{Project status?}
-%%   \begin{itemize}
-%%   \item Usable directly, but not all implementation available.
-%%   \item Multiple GPUs works.
-%%   \item Is the next GPU array container for Theano and is working.
-%%     \begin{itemize}
-%%     \item Not all Theano implementations available now.
-%%     \item OpenCL misses more implementations.
-%%     \item Multiple GPUs: supported in libgpuarray
-%%     \item Multiple GPUs: close to get integrated in Theano.
-%%     \end{itemize}
-%%   \item Web site: \url{http://deeplearning.net/software/libgpuarray/}
-%%   \end{itemize}
-%% \end{frame}
-
-%% \section{libgpuarray}
-%% \begin{frame}
-%%   \tableofcontents[currentsection]
-%% \end{frame}
-%% %TODO, make much shorter
-%% \begin{frame}{libgpuarray: Design Goals}
-%%   \begin{itemize}
-%%   \item Have the base object in C to allow collaboration with more projects.
-%%     \begin{itemize}
-%%     \item We want people from C, C++, ruby, R, \ldots all use the same base GPU ndarray.
-%%     \end{itemize}
-%%   \item Be compatible with CUDA and OpenCL.
-%%   \item Not too simple, (don’t support just matrix).
-%%   \item Support all dtype.
-%%   \item Allow strided views.
-%%   \item But still easy to develop new code that support only a few memory layout.
-%%     \begin{itemize}
-%%     \item This ease the development of new code.
-%%     \end{itemize}
-%%   \end{itemize}
-%% \end{frame}
-
-\subsection{Scan}
-\begin{frame}
-  \frametitle{Scan}
-\begin{itemize}
-\item Allows looping (for, map, while)
-\item Allows recursion (reduce)
-\item Allows recursion with dependency on many of the previous time steps
-\item Optimize some cases like moving computation outside of scan
-\item The Scan grad is done via Backpropagation Through Time(BPTT)
-\end{itemize}
-\end{frame}
-
-\begin{frame}{When not to use scan}
-\begin{itemize}
-\item If you only need it for ``vectorization'' or
-  ``broadcasting''. tensor and numpy.ndarray support them
-  natively. This will be much better for that use case.
-
-\item If you do a fixed number of iteration that is very small (2,3). You
-  are probably better to just unroll the graph to do it.
-
-\end{itemize}
-\end{frame}
-
-
-\begin{frame}[fragile,allowframebreaks]
-  \frametitle{Scan Example1: Computing tanh(v.dot(W) + b) * d where b is binomial}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        basicstyle=\footnotesize
-        }
-\begin{lstlisting}
-import theano
-import theano.tensor as tt
-import numpy as np
-
-# define tensor variables
-W = tt.matrix("W")
-X = tt.matrix("X")
-b_sym = tt.vector("b_sym")
-
-# define shared random stream
-trng = theano.tensor.random.utils.RandomStream(1234)
-d=trng.binomial(size=W[1].shape)
-\end{lstlisting}
-\end{frame}
-
-
-\begin{frame}[fragile]
-  \frametitle{Scan Example1: Computing tanh(v.dot(W) + b) * d where d is binomial (2)}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-results, updates = theano.scan(
-    lambda v: tt.tanh(tt.dot(v, W) + b_sym) * d,
-    sequences=X)
-f = theano.function(inputs=[X, W, b_sym],
-                    outputs=[results],
-                    updates=updates)
-x = np.eye(10, 2, dtype=theano.config.floatX)
-w = np.ones((2, 2), dtype=theano.config.floatX)
-b = np.ones((2), dtype=theano.config.floatX)
-
-print f(x, w, b)
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Scan Example2: Computing pow(A, k)}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-import theano
-import theano.tensor as tt
-theano.config.warn__subtensor_merge_bug = False
-
-k = tt.iscalar("k")
-A = tt.vector("A")
-
-def inner_fct(prior_result, B):
-    return prior_result * B
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Scan Example2: Computing pow(A, k) (2)}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-result, updates = theano.scan(
-    fn=inner_fct,
-    outputs_info=T.ones_like(A),
-    non_sequences=A, n_steps=k)
-
-# Scan provide us with A ** 1 through A ** k.
-# Keep only the last value. Scan optimize memory.
-final = result[-1]
-
-power = theano.function(inputs=[A, k], outputs=final,
-                      updates=updates)
-print power(range(10), 2)
-#[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Scan signature}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-result, updates = theano.scan(
-    fn=inner_fct,
-    sequences=[]
-    outputs_info=[tt.ones_like(A)],
-    non_sequences=A,
-    n_steps=k)
-\end{lstlisting}
-
-\begin{itemize}
-\item Updates are needed if there are random numbers generated in the inner function
-\begin{itemize}
-\item Pass them to the call theano.function(..., updates=updates)
-\end{itemize}
-\item The inner function of scan takes arguments like this:
-   scan: sequences, outputs\_info, non sequences
-\end{itemize}
-
-\end{frame}
-
-
-\section{RNN}
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-\begin{frame}
-  \frametitle{Recurrent Neural Network Overview}
-\begin{itemize}
-\item RNN is a class of neural network that allows to work with sequences of variable sizes.
-\item Some layers have recurrent connections to themselves with a time delay.
-  \begin{itemize}
-  \item This create an internal state that allows to exhibit dynamic temporal behavior.
-  \end{itemize}
-\end{itemize}
-Image from wikipedia by Fyedernoggersnodden
-\includegraphics[width=0.35\textwidth]{../images/Elman_srnn.png}
-\end{frame}
-
-\section{LSTM}
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-\begin{frame}
-  \frametitle{Motivation}
-
-The RNN gradient signal ends up being multiplied a large number of
-times (up to as many as the number of timesteps) by the transition
-matrix (the matrix containing the weights of the recurrent
-connections.  This means that, the magnitude of the weights in the
-transition matrix can have a strong impact on the learning process.
-
-\begin{itemize}
-\item \begin{bf}vanishing gradients\end{bf}
- If the weights in this matrix are small (or, more formally, if the leading eigenvalue of the weight matrix is smaller than 1.0).
-\item \begin{bf}exploding gradients\end{bf} If the weights in this matrix are large (or, again, more formally, if the leading eigenvalue of the weight matrix is larger than 1.0),
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-  \frametitle{History}
-\begin{itemize}
-\item Original version introduced in 1997 by Hochreiter, S., \& Schmidhuber, J.
-\item Forget gate introduced in 2000 by Gers, F. A., Schmidhuber, J., \& Cummins, F.
-\item All people we know use forget gate now.
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-  \frametitle{LSTM overview}
-\includegraphics[width=0.75\textwidth]{../images/lstm.png}
-\end{frame}
-
-
-\begin{frame}
-  \frametitle{LSTM cell}
-\includegraphics[width=0.75\textwidth]{../images/lstm_memorycell.png}
-\end{frame}
-
-\begin{frame}[allowframebreaks]
-  \frametitle{LSTM math}
-The equations on the next slide describe how a layer of memory cells is updated at every timestep t.
-
-In these equations :
-
-% 'm' has no special meaning here except being a size reference for the length of the label (and the spacing before the descriptions
-\begin{description}[m]
-\item[$x_t$] \hfill \\
-is the input to the memory cell layer at time t
-\item[$W_i$, $W_f$, $W_c$, $W_o$, $U_i$, $U_f$, $U_c$, $U_o$ and $V_o$] \hfill \\
- are weight matrices
-\item[$b_i$, $b_f$, $b_c$ and $b_o$] \hfill \\
-are bias vectors
-\end{description}
-
-\framebreak
-
-First, we compute the values for $i_t$, the input gate, and $\widetilde{C_t}$ the candidate value for the states of the memory cells at time t :
-
-\begin{equation}
-i_t = \sigma(W_i x_t + U_i h_{t-1} + b_i)
-\end{equation}
-\begin{equation}
-\widetilde{C_t} = tanh(W_c x_t + U_c h_{t-1} + b_c)
-\end{equation}
-
-Second, we compute the value for $f_t$, the activation of the memory cells’ forget gates at time t :
-
-\begin{equation}
-f_t = \sigma(W_f x_t + U_f h_{t-1} + b_f)
-\end{equation}
-
-\framebreak
-
-Given the value of the input gate activation $i_t$, the forget gate activation $f_t$ and the candidate state value $\widetilde{C_t}$, we can compute $C_t$ the memory cells’ new state at time $t$ :
-
-\begin{equation}
-C_t = i_t * \widetilde{C_t} + f_t * C_{t-1}
-\end{equation}
-
-With the new state of the memory cells, we can compute the value of their output gates and, subsequently, their outputs :
-
-\begin{equation}
-o_t = \sigma(W_o x_t + U_o h_{t-1} + V_o C_t + b_1)
-\end{equation}
-\begin{equation}
-h_t = o_t * tanh(C_t)
-\end{equation}
-
-\end{frame}
-
-\begin{frame}
-  \frametitle{Tutorial LSTM}
-The model we used in this tutorial is a variation of the standard LSTM model. In this variant, the activation of a cell’s output gate does not depend on the memory cell’s state $C_t$. This allows us to perform part of the computation more efficiently (see next slide, for details). This means that, in the variant we have implemented, there is no matrix $V_o$ and equation (5) is replaced by equation (7) :
-
-\begin{equation}
-o_t = \sigma(W_o x_t + U_o h_{t-1} + b_1)
-\end{equation}
-
-\end{frame}
-
-\begin{frame}
-  \frametitle{Implementation Note}
-In the code included this tutorial, the equations (1), (2), (3) and (7) are performed in parallel to make the computation more efficient. This is possible because none of these equations rely on a result produced by the other ones. It is achieved by concatenating the four matrices $W_*$ into a single weight matrix W and performing the same concatenation on the weight matrices $U_*$ to produce the matrix U and the bias vectors $b_*$ to produce the vector b. Then, the pre-nonlinearity activations can be computed with :
-\vspace{-1em}
-\begin{equation*}
-z = \sigma(W x_t + U h_{t-1} + b)
-\end{equation*}
-\vspace{-2em} % don't remove the blank line
-
-The result is then sliced to obtain the pre-nonlinearity activations for i, f, $\widetilde{C_t}$, and o and the non-linearities are then applied independently for each.
-\end{frame}
-
-\begin{frame}{LSTM Tips For Training}
-\begin{itemize}
-\item Do not use SGD, but use something like adagrad or rmsprop.
-\item Initialize any recurrent weights as orthogonal matrices (orth\_weights). This helps optimization.
-\item Take out any operation that does not have to be inside "scan".
-      Theano does many cases, but not all.
-\item Rescale (clip) the L2 norm of the gradient, if necessary.
-\item You can use weight noise (try first with $dot(U_c+noise, h_{t-1})$).
-\item You can use dropout at the output of the recurrent layer.
-\end{itemize}
-\end{frame}
-
-\section{Exercices}
-\begin{frame}{Exercices}
-\begin{itemize}
-  \item Theano exercice: Work through the ``0[1-4]*'' exercices (directory):
-
-    Available at ``git~clone~https://github.com/abergeron/ccw\_tutorial\_theano.git''.
-
-  \item Scan exercices: \url{http://deeplearning.net/software/theano/tutorial/loop.html\#exercise}
-
-  \item Modif LSTM: Add the V\_o parameter and use it.
-  \item Modif LSTM: Reverse the input sequence and try it like that:
-        Sutskever-NIPS2014 (No solutions provided)
-  \item Modif LSTM: Add to have 2 LSTM layers. The new one take the
-    input in the reverse order. Then you concatenate the mean of the
-    outputs of both LSTM to the logistic regression. (No solutions provided)
-\end{itemize}
-
-% I don't know how to fix this frame since it seems incomplete.
-
-Deep Learning Tutorial on LSTM: \url{http://deeplearning.net/tutorial/lstm.html}
-
-(It have the papers
-\end{frame}
-
-
-\begin{frame}{Acknowledgments}
-\begin{itemize}
-\item All people working or having worked at the LISA lab.
-\item All Theano users/contributors
-\item Compute Canada, RQCHP, NSERC, and Canada Research Chairs for providing funds or access to compute resources.
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\begin{center}
-\Huge
-Questions?
-\end{center}
-\end{frame}
-
-
-\end{document}
diff --git a/doc/nextml2015/theano_grad.png b/doc/nextml2015/theano_grad.png
deleted file mode 100644
index 1c13c7b711..0000000000
Binary files a/doc/nextml2015/theano_grad.png and /dev/null differ
diff --git a/doc/nice_quotes.txt b/doc/nice_quotes.txt
deleted file mode 100644
index 1395913535..0000000000
--- a/doc/nice_quotes.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-:orphan:
-
-"Thank YOU for correcting it so quickly. I wish all packages I worked
-with would have such an active maintenance - this is as good as it
-gets :-)"
--- Jan Antolik, [theano-users] strange behaviour, Mon, Aug 2, 2010 at 1:36 PM
-
--------------------------
-
-"Theano rocks incredibly. It's like the holy grail of linear algebra
-computations."
-
--- visionlessvisionary on reddit
-
-http://www.reddit.com/r/MachineLearning/comments/banhb/deep_learning_tutorial_learn_to_build_complex/c0lsvik
-
--------------------------
-
-I am completely new to theano and after running the deep-learning
-tutorial and see the examples actually work on my GTX 275 I must say I
-am 100% sold on the theano approach; this is an amazing project that
-deserves broad recognition among the scientific python community.
-
--- Olivier Grisel
-
-
diff --git a/doc/omlw2014/Makefile b/doc/omlw2014/Makefile
deleted file mode 100644
index 2c88ef91bc..0000000000
--- a/doc/omlw2014/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-all: presentation.pdf sharing.pdf
-
-clean:
-	rm -f pygpu_ndarray.so core.* *.o *~
-
-cleantmp:
-	rm -f core.* *.o *~
-
-presentation.pdf: presentation.tex
-	pdflatex presentation
-	pdflatex presentation
-
-sharing.pdf: sharing.tex
-	pdflatex sharing
-	pdflatex sharing
diff --git a/doc/omlw2014/logreg.py b/doc/omlw2014/logreg.py
deleted file mode 100644
index ae1fc1d91e..0000000000
--- a/doc/omlw2014/logreg.py
+++ /dev/null
@@ -1,45 +0,0 @@
-
-import numpy as np
-import theano
-import theano.tensor as tt
-rng = np.random
-
-N = 400
-feats = 784
-D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
-training_steps = 10000
-
-# Declare Theano symbolic variables
-x = tt.matrix("x")
-y = tt.vector("y")
-w = theano.shared(rng.randn(feats), name="w")
-b = theano.shared(0., name="b")
-print("Initial model:")
-print(w.get_value(), b.get_value())
-
-# Construct Theano expression graph
-p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))   # Probability that target = 1
-prediction = p_1 > 0.5                      # The prediction thresholded
-xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy loss
-cost = xent.mean() + 0.01 * (w ** 2).sum()  # The cost to minimize
-gw, gb = tt.grad(cost, [w, b])
-
-# Compile
-train = theano.function(
-    inputs=[x, y],
-    outputs=[prediction, xent],
-    updates=[(w, w - 0.1 * gw),
-             (b, b - 0.1 * gb)],
-    name='train')
-
-predict = theano.function(inputs=[x], outputs=prediction,
-                          name='predict')
-
-# Train
-for i in range(training_steps):
-    pred, err = train(D[0], D[1])
-
-print("Final model:")
-print(w.get_value(), b.get_value())
-print("target values for D:", D[1])
-print("prediction on D:", predict(D[0]))
diff --git a/doc/omlw2014/omlw_presentation.pdf b/doc/omlw2014/omlw_presentation.pdf
deleted file mode 100644
index cd777081dd..0000000000
Binary files a/doc/omlw2014/omlw_presentation.pdf and /dev/null differ
diff --git a/doc/omlw2014/pr_conv_gemm_profile.png b/doc/omlw2014/pr_conv_gemm_profile.png
deleted file mode 100644
index 10dd96c3a0..0000000000
Binary files a/doc/omlw2014/pr_conv_gemm_profile.png and /dev/null differ
diff --git a/doc/omlw2014/presentation.tex b/doc/omlw2014/presentation.tex
deleted file mode 100644
index 456d088371..0000000000
--- a/doc/omlw2014/presentation.tex
+++ /dev/null
@@ -1,464 +0,0 @@
-\documentclass[utf8x,xcolor=pdftex,dvipsnames,table]{beamer}
-\usetheme{Malmoe}  % Now it's a beamer presentation with the lisa theme!
-\setbeamertemplate{footline}[page number]
-\usecolortheme{beaver}
-\usepackage[T1]{fontenc}
-\usepackage{amsmath}
-\usepackage[utf8x]{inputenc}
-%\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}}
-\usepackage{listings}
-
-\newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}}
-
-\mode<presentation>
-
-\title{Theano, Pylearn2, libgpuarray Presentation}
-
-\author{%
-\footnotesize
-Frédéric Bastien, Bart van Merriënboer \newline
-Département d'Informatique et de Recherche Opérationnelle \newline
-Université de Montréal \newline
-Montréal, Canada \newline
-\texttt{\{bastienf, vanmerb\}@iro.umontreal.ca} \newline \newline
-}
-
-\date{OML Workshop 2014}
-
-\setbeamertemplate{navigation symbols}{}
-
-\begin{document}
-
-\begin{frame}[plain]
- \titlepage
- \vspace{-5em}
- \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png}
- \hfill
- \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop}
-\end{frame}
-
-\section{Introduction}
-\begin{frame}{High level}\setcounter{page}{1}
-  Python <- \{NumPy/SciPy/libgpuarray\} <- Theano <- Pylearn2
-  \begin{itemize}
-  \item Python: OO coding language
-  \item Numpy: $n$-dimensional array object and scientific computing toolbox
-  \item SciPy: sparse matrix objects and more scientific computing functionality
-  \item libgpuarray: GPU $n$-dimensional array object in C for CUDA and OpenCL
-  \item Theano: compiler/symbolic graph manipulation
-  \item Pylearn2: machine learning framework
-  \end{itemize}
-\end{frame}
-
-
-%% \begin{frame}{Others}
-%%   \begin{itemize}
-%%   \item matplotlib: one of the many plotting library
-%%   \item IPython: Advanced python shell
-%%   \item IPython notebook: web-based interactive computational environment where you can combine code execution, text, mathematics, plots and rich media into a single document
-%%   \end{itemize}
-%% \end{frame}
-
-\begin{frame}{Python}
-  \begin{itemize}
-  \item General-purpose high-level OO interpreted language
-  \item Emphasizes code readability
-  \item Comprehensive standard library
-  \item Dynamic type and memory management
-  \item Slow execution
-  \item Easily extensible with C
-  \item Popular in {\em web development}\ and {\em scientific communities}
-  \end{itemize}
-\end{frame}
-
-\begin{frame}{NumPy/SciPy}
-  \begin{itemize}
-  \item Python floats are full-fledged objects on the heap
-      \begin{itemize}
-      \item Not suitable for high-performance computing!
-      \end{itemize}
-
-  \item NumPy provides an $n$-dimensional numeric array in Python
-      \begin{itemize}
-      \item Perfect for high-performance computing
-      \item Slices of arrays are views (no copying)
-      \end{itemize}
-
-  \item NumPy provides
-      \begin{itemize}
-      \item Elementwise computations
-      \item Linear algebra, Fourier transforms
-      \item Pseudorandom number generators (many distributions)
-      \end{itemize}
-
-  \item SciPy provides lots more, including
-      \begin{itemize}
-      \item Sparse matrices
-      \item More linear algebra
-      \item Solvers and optimization algorithms
-      \item Matlab-compatible I/O
-      \item I/O and signal processing for images and audio
-      \end{itemize}
-  \end{itemize}
-\end{frame}
-
-\begin{frame}{What's missing?}
-  \begin{itemize}
-    \item Non-lazy evaluation (required by Python) hurts performance
-    \item Bound to the CPU
-    \item Lacks symbolic or automatic differentiation
-    \item No automatic speed and stability optimization
-  \end{itemize}
-
-\end{frame}
-
-%% \begin{frame}{Why scripting for GPUs?}
-%%   \begin{bf}They complement each other\end{bf}
-
-%%   GPUs are everything that high level languages are not
-
-%%   \begin{itemize}
-%%     \item Highly parallel
-%%     \item Very architecture-sensitive
-%%     \item Built for maximum FP/memory throughput
-%%     \item So hard to program that meta-programming is easier
-%%   \end{itemize}
-
-%%   \begin{bf}Best of both worlds:\end{bf} easily scripted code which invokes high-performance GPU kernels.
-
-%%   \begin{bf}Theano C code generation removes overhead\end{bf} of
-%%   function calls between Python and C by launching many C functions at once.
-
-%% \end{frame}
-
-\begin{frame}{Theano}
-
-  High-level domain-specific language tailored to numeric computation.
-
-  \begin{itemize}
-    \item Syntax as close to NumPy as possible
-    \item Compiles most common expressions to C for CPU and/or GPU
-    \item Limited expressivity means more opportunities optimizations
-    \begin{itemize}
-      \item No subroutines -> global optimization
-      \item Strongly typed -> compiles to C
-      \item Array oriented -> easy parallelism
-      \item Support for looping and branching in expressions
-    \end{itemize}
-    \item Automatic speed and stability optimizations
-    \item Can reuse other technologies for best performance.
-    \begin{itemize}
-      \item BLAS, SciPy, Cython, Numba, PyCUDA, CUDA
-    \end{itemize}
-    \item Automatic differentiation and R op
-    \item Sparse matrices
-  \end{itemize}
-\end{frame}
-
-
-\begin{frame}{Pylearn2}
-
-  Machine Learning library aimed at researchers
-
-  \begin{itemize}
-    \item Built on top of Theano, for fast execution and use of GPU
-    \item Easy to try variants of implemented algorithms, and to extend them (using Theano)
-    \item Very modular, each component of the library can be used in isolation
-    \item Experiments can be specified through a YAML config file, or by a Python script
-    \item Scripts for visualizing weights, plot monitored values
-  \end{itemize}
-\end{frame}
-
-
-\begin{frame}{libgpuarray}
-  Goal: A common GPU $n$-dimensional array that can be reused by all projects, support for both CUDA and OpenCL.
-  \newline \newline
-  Motivation:
-  \begin{itemize}
-  \item Currently there are at least 6 different GPU arrays in Python
-    \begin{itemize}
-    \item CudaNdarray (Theano), GPUArray (pycuda), CUDAMatrix (cudamat), GPUArray (pyopencl), Clyther, Copperhead, ...
-    \item There are even more if we include other languages.
-    \end{itemize}
-  \item They are incompatible
-    \begin{itemize}
-    \item None have the same properties and interface
-    \end{itemize}
-  \item All of them implement a subset of numpy.ndarray properties
-  \item This is the new GPU backend on Theano
-  \end{itemize}
-\end{frame}
-
-
-\begin{frame}{Goal of the stack}
-\begin{center}
-\begin{bf}Fast to develop\end{bf}\newline \bigskip
-\begin{bf}Fast to run\end{bf}\newline \bigskip
-\hspace{-2.5cm}
-\includegraphics[width=0.35\textwidth]{road-runner-1.jpg}
-\end{center}
-\end{frame}
-
-
-\section{Theano}
-% I think it is a good idea to make explicit the change into a new section -- PL
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-\begin{frame}{Description}
-  \begin{itemize}
-    \item Mathematical symbolic expression compiler
-    \item Expressions mimic NumPy's syntax and semantics
-    \item Dynamic C/CUDA code generation
-    \begin{itemize}
-      \item C/C++, CUDA, OpenCL, PyCUDA, Cython, Numba, \ldots
-    \end{itemize}
-    \item Efficient symbolic differentiation
-    %\begin{itemize}
-    %  \item Derivatives of functions with one or many inputs.
-    %  \item Computation of the Jacobian, Hessian, R and L op.
-    %\end{itemize}
-    \item Speed and stability optimizations
-    \begin{itemize}
-      \item Gives the right answer for ``$\log (1 + x)$'' even if $x$ is really tiny.
-    \end{itemize}
-    \item Extensive unit-testing and self-verification
-    %\begin{itemize}
-    %  \item Detects and diagnoses many types of errors
-    %\end{itemize}
-    \item Works on Linux, OS X and Windows
-    \item Transparent use of a GPU
-    \begin{itemize}
-      \item {\tt float32} only for now (libgpuarray provides much more)
-      \item Limited support on Windows
-    \end{itemize}
-
-%    \item Statically typed and purely functional
-    \item Sparse operations (CPU only)
-  \end{itemize}
-\end{frame}
-
-% The following does not work with lstset, for some reason
-%\begin{frame}{Simple example}
-\begin{frame}[fragile]
-  \frametitle{Simple example}
-
-\lstset{language=Python,
-        commentstyle=\itshape\color{blue},
-        stringstyle=\color{violet},
-        }
-\begin{lstlisting}
-import theano
-# declare symbolic variable
-a = theano.tensor.vector("a")
-# build symbolic expression
-b = a + a ** 10
-# compile function
-f = theano.function([a], b)
-print f([0, 1, 2])
-# prints `array([0, 2, 1026])`
-\end{lstlisting}
-\end{frame}
-
-\begin{frame}{Simple example: graph optimization}
-\center
-\includegraphics[width=0.35\textwidth]{../hpcs2011_tutorial/pics/f_unoptimized.png}
-\hspace{0.1\textwidth}
-\includegraphics[width=0.35\textwidth]{../hpcs2011_tutorial/pics/f_optimized.png}
-%Symbolic programming = *Paradigm shift*: people need to use it to understand it.
-
-\end{frame}
-
-
-\begin{frame}{Project status?}
-  \begin{itemize}
-    \item Mature: Theano has been developed and used since January 2008 (6.5 yrs old)
-    \item Driven over 100 research papers
-    \item Good user documentation
-    \item Active mailing list with participants from outside our lab
-    \item Core technology for a few Silicon-Valley start-ups
-    \item Many contributors (some from outside our lab)
-    \item Used to teach many university classes
-    \item Has been used for research at Google and Yahoo.
-  \end{itemize}
-  Theano: \url{deeplearning.net/software/theano/}
-
-  Deep Learning Tutorials: \url{deeplearning.net/tutorial/}
-\end{frame}
-
-
-\section{Pylearn2}
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-\begin{frame}{Pylearn2 details}
-    The core library contains a collection of:
-    \begin{itemize}
-      \item Training algorithms (e.g. Stochastic and Batch GD, model-specific rules)
-      \begin{itemize}
-        \item Costs, supervised/unsupervised and exact/estimated (e.g. NLL, Score matching, NCE)
-        \item Monitor, history of (functions of) parameters and hyperparameters on different data sets (training, validation, test)
-        \item Termination criteria, determine when to stop training
-      \end{itemize}
-      \item Training extensions, perform actions throughout the training process (e.g., early stopping)
-      \item Models (e.g. NNets, ConvNets, RBMs, k-means, PCA, SVMs)
-      \item Datasets (e.g. MNIST, CIFAR-10) and preprocessors (LCN, ZCA)
-    \end{itemize}
-\end{frame}
-
-\begin{frame}{Pylearn2 details, continued}
-\begin{itemize}
-  \item Data specifications which give semantics to data
-  \begin{itemize}
-    \item IndexSpace, 1D integer array e.g.\ for labels
-    \item VectorSpace, 1D float array e.g.\ for softmax output
-    \item Conv2DSpace, 3D float32 arrays e.g.\ for color image input
-  \end{itemize}
-  \item Allows for automatic conversion when needed e.g.\ labels to one-hot vectors, images to flattened vectors
-  \item YAML file allows experiments to be conducted without writing code
-\end{itemize}
-\end{frame}
-
-\begin{frame}{Project status}
-  \begin{itemize}
-    \item Has been used for scientific publications, Kaggle competitions, used by many researchers at LISA
-    \item Still under rapid development, however the API shouldn't break without warning
-    \item Documentation is incomplete, but quickly improving
-    \item Active mailing list with participants from outside our lab
-    \item Core technology for a least one Silicon-Valley start-up
-    \item Features currently in development:
-    \begin{itemize}
-      \item Recurrent neural networks (RNNs), based on the GroundHog framework developed at LISA
-      \item Better hyperparameter search support, using e.g. Hyperopt
-    \end{itemize}
-  \end{itemize}
-\end{frame}
-
-%% \begin{frame}[fragile]
-%%   \frametitle{Simple example}
-
-%% % I know it is not Python, but YAML is not supported by listings
-%% % close enough? -- PL
-%% \lstset{language=python,
-%%         commentstyle=\slshape\color{blue},
-%%         stringstyle=\color{violet},
-%%         basicstyle=\tiny\ttfamily}
-%% \begin{lstlisting}
-%% !obj:pylearn2.train.Train {
-%%     "dataset": !obj:pylearn2.datasets.dense_design_matrix.DenseDesignMatrix &dataset {
-%%         "X" : !obj:numpy.random.normal { 'size': [5,3] },
-%%     },
-%%     "model": !obj:pylearn2.models.autoencoder.DenoisingAutoencoder {
-%%         "nvis" : 3,
-%%         "nhid" : 4,
-%%         "irange" : 0.05,  # Interval from which to sample weights
-%%         "corruptor": !obj:pylearn2.corruption.BinomialCorruptor {
-%%             "corruption_level": 0.5,
-%%         },
-%%         "act_enc": "tanh",
-%%         "act_dec": null,    # Linear activation on the decoder side.
-%%     },
-%%     "algorithm": !obj:pylearn2.training_algorithms.sgd.SGD {
-%%         "learning_rate" : 1e-3,
-%%         "batch_size" : 5,
-%%         "monitoring_dataset" : *dataset,
-%%         "cost" : !obj:pylearn2.costs.autoencoder.MeanSquaredReconstructionError {},
-%%         "termination_criterion" : !obj:pylearn2.termination_criteria.EpochCounter {
-%%             "max_epochs": 10,
-%%         },
-%%     }
-%% }
-%% \end{lstlisting}
-%% \end{frame}
-
-%% \begin{frame}[fragile]
-%%   \frametitle{Simple example}
-
-%% \lstset{language=python,
-%%         commentstyle=\itshape\color{blue},
-%%         stringstyle=\color{violet},
-%%         basicstyle=\small
-%%         }
-%% \begin{lstlisting}
-%% # Use Pylearn2 to perform a linear transformation
-%% # followed by a softmax
-%% x = theano.tensor.vector("x")
-%% softmax = pylearn2.models.mlp.Softmax(
-%%     n_classes=2, layer_name="softmax", irange=0.05
-%% )
-%% softmax.set_input_space(
-%%   pylearn2.space.VectorSpace(dim=5)
-%% )
-%% y = softmax.fprop(x)
-%% f = theano.function([x], y)
-%% print f([0.12, 0.12, 0.43, 0.32, 0.96])
-%% # prints [0.43, 0.54]
-%% \end{lstlisting}
-%% \end{frame}
-
-\section{libgpuarray}
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-\begin{frame}{libgpuarray: Design Goals}
-  \begin{itemize}
-  \item Have the base object in C to allow collaboration with more projects.
-    \begin{itemize}
-    \item We want people from C, C++, ruby, R, \ldots all use the same base GPU ndarray.
-    \end{itemize}
-  \item Be compatible with CUDA and OpenCL.
-  \item Not too simple, (don’t support just matrix).
-  \item Support all dtype.
-  \item Allow strided views.
-  \item But still easy to develop new code that support only a few memory layout.
-    \begin{itemize}
-    \item This ease the development of new code.
-    \end{itemize}
-  \end{itemize}
-\end{frame}
-
-\begin{frame}{Project status?}
-  \begin{itemize}
-  \item Usable directly, but not all implementation available.
-  \item Multiple GPUs works.
-  \item Is the next GPU array container for Theano and is working.
-    \begin{itemize}
-    \item Not all Theano implementations available now.
-    \item OpenCL misses more implementations.
-    \item Multiple GPUs on the way.
-    \end{itemize}
-  \item Web site: \url{http://deeplearning.net/software/libgpuarray/}
-  \end{itemize}
-\end{frame}
-
-\section{Conclusion}
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-\begin{frame}{Conclusion}
-Theano/Pylearn2/libgpuarry provide an environment for machine learning that is:
-\begin{bf}Fast to develop\end{bf}\newline
-\begin{bf}Fast to run\end{bf}\newline
-\end{frame}
-
-\begin{frame}{Acknowledgments}
-\begin{itemize}
-\item All people working or having worked at the LISA lab.
-\item All Theano/Pylearn 2 users/contributors
-\item Compute Canada, RQCHP, NSERC, and Canada Research Chairs for providing funds or access to compute resources.
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\begin{center}
-\Huge
-Questions?
-\end{center}
-\end{frame}
-
-
-\end{document}
diff --git a/doc/omlw2014/road-runner-1.jpg b/doc/omlw2014/road-runner-1.jpg
deleted file mode 100644
index 301a3d9849..0000000000
Binary files a/doc/omlw2014/road-runner-1.jpg and /dev/null differ
diff --git a/doc/omlw2014/sharing.tex b/doc/omlw2014/sharing.tex
deleted file mode 100644
index 215d7fc428..0000000000
--- a/doc/omlw2014/sharing.tex
+++ /dev/null
@@ -1,96 +0,0 @@
-\documentclass[utf8x,xcolor=pdftex,dvipsnames,table]{beamer}
-\usetheme{Malmoe}  % Now it's a beamer presentation with the lisa theme!
-\setbeamertemplate{footline}[page number]
-\usecolortheme{beaver}
-\usepackage[T1]{fontenc}
-\usepackage{amsmath}
-\usepackage[utf8x]{inputenc}
-%\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}}
-\usepackage{listings}
-
-\newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}}
-
-\mode<presentation>
-
-\title{Theano, Pylearn2, libgpuarray: Sharing and Future}
-
-\author{%
-\footnotesize
-Frédéric Bastien, Bart van Merriënboer \newline
-Département d'Informatique et de Recherche Opérationnelle \newline
-Université de Montréal \newline
-Montréal, Canada \newline
-\texttt{\{bastienf, vanmerb\}@iro.umontreal.ca} \newline \newline
-}
-
-\date{OML Workshop 2014}
-
-\setbeamertemplate{navigation symbols}{}
-
-\begin{document}
-
-\begin{frame}[plain]
- \titlepage
- \vspace{-5em}
- \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png}
- \hfill
- \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop}
-\end{frame}
-
-\section{Future}
-\begin{frame}
-  \tableofcontents[currentsection]
-\end{frame}
-
-\begin{frame}{Theano}\setcounter{page}{1}
-\begin{itemize}
-\item Easier C code development and better documentation of that
-\item Faster compilation
-\item Multi-GPU
-\item Better looping (update to scan)
-\item Allow checkpoint with GPU to reload without GPU
-\item Less memory allocation(lower Theano overhead)
-\item Faster convolution
-\end{itemize}
-\end{frame}
-
-\begin{frame}{libgpuarray}
-\begin{itemize}
-\item Find other projects to use it?
-\item More functionality as NumPy
-\item Move some of the functionality from Python/Theano to the C level
-\item Optimize the kernel selection and parametrization based on the GPU
-\end{itemize}
-\end{frame}
-
-\begin{frame}{Pylearn2}
-\begin{itemize}
-\item RNN
-\item Better hyperparameter search support, using e.g. Hyperopt
-\item Documentation
-\item Checkpoint
-\item Better support for sparse dataset
-\item Machine translation examples
-\item Gated activations for conditional computation
-\item Variational Auto-Encoders
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\end{frame}
-
-\begin{frame}{Simplifying code sharing between}
-\begin{enumerate}
-  \item<1-> License: \begin{bf}Suggest BSD\end{bf} as it is used by many software in our field.
-    \begin{itemize}
-    \item Common license help share code.
-    \item When reusing code, don't forget to keep the license and the copyright notice
-    \end{itemize}
-  \item<2-> Common base object! \begin{bf}libgpuarray\end{bf}
-  \item<3-> Otherwise: put important implementation(e.g. convolution) in \begin{bf}separate file\end{bf} and \begin{bf}use raw ptr/shape/strides\end{bf} as inputs. Document that interface.
-  \item<4-> Acknowledge reuse \begin{bf}in section on web site\end{bf} AND \begin{bf}in papers\end{bf} about the software we reuse! (and use too)
-\end{enumerate}
-\end{frame}
-
-
-\end{document}
diff --git a/doc/optimizations.txt b/doc/optimizations.txt
index 025e9ec1fc..448e7ee1c5 100644
--- a/doc/optimizations.txt
+++ b/doc/optimizations.txt
@@ -4,7 +4,7 @@
 Optimizations
 ==============
 
-Theano applies many kinds of graph optimizations, with different objectives:
+Aesara applies many kinds of graph optimizations, with different objectives:
  * simplifying and standardizing the form of the expression graph (e.g.  :term:`merge`, :term:`add canonicalization` ),
  * reducing the maximum memory footprint (e.g. :term:`inplace_elemwise`),
  * increasing execution speed (e.g. :term:`constant folding`).
@@ -14,7 +14,7 @@ gives a quick summary of the optimizations included in the default modes.
 The descriptions are brief and point to further reading.
 
 If you would like to add an additional optimization, refer to
-:ref:`optimization` in the guide to extending Theano.
+:ref:`optimization` in the guide to extending Aesara.
 
 When compiling, we can make a tradeoff between compile-time and run-time.
 Faster compile times will result in fewer optimizations being applied, hence generally slower run-times.
@@ -31,7 +31,7 @@ For an even faster run-time, we could disable assertions (which could be time co
 
     .. code-block:: bash
 
-        python -c "import theano; theano.compile.optdb.query(theano.compile.predefined_optimizers['<OPT_ID>']).print_summary()"
+        python -c "import aesara; aesara.compile.optdb.query(aesara.compile.predefined_optimizers['<OPT_ID>']).print_summary()"
 
     where <OPT_ID> can be one of o1 (:ref:`† <o1=>`), o2, o3, o4 (:ref:`* <o4=>`),
     Stabilization or unsafe.
@@ -86,7 +86,7 @@ Optimization                                              o4             o3  o2
         optimization will ensure that ``x`` and ``y`` are only added once.
 
         This optimization is very useful because it frees users to write
-        highly redundant mathematical code.  Theano will make sure to compute
+        highly redundant mathematical code.  Aesara will make sure to compute
         just what is necessary.
 
         See :class:`MergeOptimizer`.
@@ -98,7 +98,7 @@ Optimization                                              o4             o3  o2
         See :func:`opt.constant_folding`
 
     shape promotion
-        Theano often knows how to infer the shape of an output from the shape
+        Aesara often knows how to infer the shape of an output from the shape
         of its inputs.  Without this optimization, it would otherwise have to
         compute things (e.g. ``log(x)``) just to find out the shape of it!
 
@@ -131,7 +131,7 @@ Optimization                                              o4             o3  o2
 
     constant elimination
         Many constants indicate special cases, such as ``pow(x,1) -> x``.
-        Theano recognizes many of these special cases.
+        Aesara recognizes many of these special cases.
 
         See :func:`local_mul_specialize`, :func:`local_mul_specialize`,:func:`local_mul_specialize`
 
@@ -164,7 +164,7 @@ Optimization                                              o4             o3  o2
         See :func:`local_dot_to_dot22`
 
     sparse_dot
-        Theano has a sparse matrix multiplication algorithm that is faster in
+        Aesara has a sparse matrix multiplication algorithm that is faster in
         many cases than scipy's (for dense matrix output).  This optimization
         swaps scipy's algorithm for ours.
 
@@ -272,7 +272,7 @@ Optimization                                              o4             o3  o2
         'float32' ndarray arguments, and these shared variables act as seeds
         for the greedy algorithm.
 
-        See :func:`theano.sandbox.cuda.opt.*`.
+        See :func:`aesara.sandbox.cuda.opt.*`.
 
     local_log_softmax
         This is a stabilization optimization.
@@ -282,7 +282,7 @@ Optimization                                              o4             o3  o2
 
     local_remove_all_assert
         This is an unsafe optimization.
-        For the fastest possible Theano, this optimization can be enabled by
+        For the fastest possible Aesara, this optimization can be enabled by
 	setting ``optimizer_including=local_remove_all_assert`` which will
 	remove all assertions in the graph for checking user inputs are valid.
         Use this optimization if you are sure everything is valid in your graph.
diff --git a/doc/proposals/complex_gradient.txt b/doc/proposals/complex_gradient.txt
deleted file mode 100644
index e1686fc244..0000000000
--- a/doc/proposals/complex_gradient.txt
+++ /dev/null
@@ -1,191 +0,0 @@
-.. complex_grad_proposal:
-
-===========================================
-Proposal for gradient wrt complex variables
-===========================================
-
-This is a proposal to handle gradients of a scalar, real variable
-(usually, a cost) with respect to tensor variables, of complex (and
-real) type, in an optimization perspective.
-
-Derivative of complex variables is usually studied only for so-called
-*analytical* complex functions, which have a particular structure in
-their partial derivatives. However, we do not want to limit ourselves
-to analytical functions, and we make other assumptions (that the final
-cost is real-valued, for instance), so **we will adopt a different
-convention** for gradients than what is usually used in the literature.
-
-
-Gradient (re-)definition
-========================
-
-We are interested in the case where we have a final real-valued
-cost, :math:`C`, and a graph of mathematical expressions, including
-real-valued and complex-valued variables (scalars, vectors, matrices,
-higher-order tensors), and we want to compute the gradient of :math:`C`,
-wrt some variables in that graph, using gradient back-propagation.
-In the case where some variables are complex, the usual chain rule
-cannot be applied, except in some cases.
-
-For each real-valued variable :math:`r` (not necessarily scalar,
-it could be a matrix, for instance), in particular :math:`\Re
-v` and :math:`\Im v`, *partial derivatives* can be defined:
-:math:`\frac{\partial C}{\partial r}` has the same number of dimensions
-and shape as :math:`r`. We will limit that notation to real-valued
-variables only, this way, the partial derivative itself will be
-real-valued too. We will **not** use that notation for the complex
-derivative of analytical complex functions.
-
-For any real-valued intermediate variable :math:`t`, the usual chain
-rule applies:
-
-.. math::
-
-    \frac{\partial C}{\partial r} = \frac{\partial C}{\partial t} \frac{\partial t}{\partial r}
-
-If :math:`z` is a complex variable, with :math:`\Re z = x` and
-:math:`\Im z = y`, we can consider :math:`x` and :math:`y` as free
-variables, and then:
-
-.. math::
-
-    \frac{\partial C}{\partial r} = \frac{\partial C}{\partial x} \frac{\partial x}{\partial r} + \frac{\partial C}{\partial y} \frac{\partial y}{\partial r}
-
-If we want to use an algorithm similar to gradient backpropagation,
-we can see that, here, we need to have both :math:`\frac{\partial
-C}{\partial \Re t}` and :math:`\frac{\partial C}{\partial \Im t}`, in order
-to compute :math:`\frac{\partial C}{\partial r}`.
-
-For each variable :math:`v` in the expression graph, let us denote
-:math:`\nabla_C(v)` the *gradient* of :math:`C` with respect to
-:math:`v`. It is a tensor with the same dimensions as :math:`v`, and can
-be complex-valued. We define:
-
-.. math::
-
-    \nabla_C(v) = \frac{\partial C}{\partial \Re v} + i \frac{\partial C}{\partial \Im v}
-
-This is the tensor that we are going to back-propagate through the
-computation graph.
-
-
-Generalized chain rule
-======================
-
-Using the definition above, if we have two complex variables :math:`z = x + iy` and :math:`t = r + is` (with :math:`x, y, r, s` all real-valued):
-
-.. math::
-
-    \nabla_C(z) &= \frac{\partial C}{\partial \Re z} + i \frac{\partial C}{\partial \Im z} \\
-                &= \frac{\partial C}{\partial x} + i \frac{\partial C}{\partial y}
-
-    \nabla_C(t) &= \frac{\partial C}{\partial \Re t} + i \frac{\partial C}{\partial \Im t} \\
-                &= \frac{\partial C}{\partial r} + i \frac{\partial C}{\partial s} \\
-                &=   \left(\frac{\partial C}{\partial x} \frac{\partial x}{\partial r} +
-                             \frac{\partial C}{\partial y} \frac{\partial y}{\partial r}\right) +
-                   i \left(\frac{\partial C}{\partial x} \frac{\partial x}{\partial s} +
-                             \frac{\partial C}{\partial y} \frac{\partial y}{\partial s}\right) \\
-                &= \frac{\partial C}{\partial x} \left(\frac{\partial x}{\partial r} + i \frac{\partial x}{\partial s}\right) +
-                   \frac{\partial C}{\partial y} \left(\frac{\partial y}{\partial r} + i \frac{\partial y}{\partial s}\right) \\
-                &= \Re \left(\nabla_C(z)\right) \left(\frac{\partial x}{\partial r} + i \frac{\partial x}{\partial s}\right) +
-                   \Im \left(\nabla_C(z)\right) \left(\frac{\partial y}{\partial r} + i \frac{\partial y}{\partial s}\right)
-
-
-This formula can be used whether or not :math:`C` is an analytical
-function of :math:`z` or :math:`t`, and whether or not :math:`z` is an
-analytical function of :math:`t`.
-
-
-Special cases
-=============
-
-Real-valued input variable
---------------------------
-
-If variable :math:`x` is defined as real-valued, it can sometimes
-be useful to have the value of :math:`\nabla_C(z)` instead of only
-:math:`\frac{\partial C}{\partial x}`, because the imaginary part
-contains information on how the cost would change if :math:`y` was not
-constrained to be 0.
-
-
-Real-valued intermediate variable
----------------------------------
-
-When :math:`x` is an intermediate variable, however, the gradient of
-:math:`C` wrt :math:`t` must not be backpropagated through :math:`y`.
-Therefore, we have:
-
-.. math::
-
-    \nabla_C(t) &= \frac{\partial C}{\partial r} + i \frac{\partial C}{\partial s} \\
-                &=   \frac{\partial C}{\partial x} \frac{\partial x}{\partial r} +
-                   i \frac{\partial C}{\partial x} \frac{\partial x}{\partial s} \\
-                &= \Re \left(\nabla_C(z)\right) \left(\frac{\partial x}{\partial r} + i \frac{\partial x}{\partial s}\right)
-
-The imaginary part of :math:`\nabla_C(z)` is ignored, because
-:math:`\Im z` is constrained to be 0.
-
-
-Analytic functions
-------------------
-
-If :math:`z` is the output of an analytic function of :math:`t`, some
-simplifications are possible. Analytic functions include, for instance,
-polynomial functions, the exponential function. Most complex functions,
-however, are not: absolute value, real part, imaginary part, complex
-conjugate, etc.
-
-Analytic (or holomorphic) functions satisfy the Cauchy-Riemann equations:
-
-.. math::
-
-    \frac{\partial \Re z}{\partial \Re t} = \frac{\partial \Im z}{\partial \Im t} \text{ and } \frac{\partial \Re z}{\partial \Im t} = - \frac{\partial \Im z}{\partial \Re t}
-
-Or, in our case:
-
-.. math::
-
-    \frac{\partial x}{\partial r} = \frac{\partial y}{\partial t} \text{ and } \frac{\partial x}{\partial s} = - \frac{\partial y}{\partial r}
-
-This leads to:
-
-.. math::
-
-    \nabla_C(t) &= \Re \left(\nabla_C(z)\right) \left(\frac{\partial x}{\partial r} + i \frac{\partial x}{\partial s}\right) +
-                   \Im \left(\nabla_C(z)\right) \left(\frac{\partial y}{\partial r} + i \frac{\partial y}{\partial s}\right) \\
-                &= \Re \left(\nabla_C(z)\right) \left(\frac{\partial x}{\partial r} + i \frac{\partial x}{\partial s}\right) +
-                   \Im \left(\nabla_C(z)\right) \left(- \frac{\partial x}{\partial s} + i \frac{\partial x}{\partial r}\right) \\
-                &= \Re \left(\nabla_C(z)\right) \left(\frac{\partial x}{\partial r} + i \frac{\partial x}{\partial s}\right) +
-                   i \Im \left(\nabla_C(z)\right) \left(\frac{\partial x}{\partial r} + i \frac{\partial x}{\partial s}\right) \\
-    \nabla_C(t) &= \nabla_C(z) \left(\frac{\partial x}{\partial r} + i \frac{\partial x}{\partial s}\right)
-                = - i \nabla_C(z) \left(\frac{\partial y}{\partial r} + i \frac{\partial y}{\partial s}\right)
-
-
-Finite differences
-==================
-
-In order to verify that the mathematical formula for a gradient, or its
-implementation, is correct, we usually use a finite-differenciation
-approach.  If :math:`C` is our real scalar cost, and :math:`x` a
-real-valued scalar variable, then:
-
-.. math::
-
-    \frac{\partial C}{\partial x} \approx \frac{C(x + \varepsilon) - C(x)}{\varepsilon}
-
-where :math:`\varepsilon` is also a real scalar, of small magnitude
-(typically :math:`10^{-6}` to :math:`10^{-4}`). If :math:`x` is a
-tensor, then this approximation has to be made for each element
-:math:`x_i` independently (a different :math:`\varepsilon_i` could be used
-each time, but usually they are all equal to :math:`\varepsilon`).
-
-For a complex scalar variable :math:`z = x + iy`:
-
-.. math::
-
-    \nabla_C(z) &= \frac{\partial C}{\partial x} + i \frac{\partial C}{\partial y}\\
-    \nabla_C(z) &\approx \frac{C(z + \delta) - C(z)}{\delta} + i \frac{C(z + i \varepsilon) - C(z)}{\varepsilon}
-
-Both partial derivative have to be estimated independently, using
-generally :math:`\delta = \varepsilon`.
diff --git a/doc/proposals/conditional.txt b/doc/proposals/conditional.txt
deleted file mode 100644
index e58d4c1da7..0000000000
--- a/doc/proposals/conditional.txt
+++ /dev/null
@@ -1,157 +0,0 @@
-
-=============================================================================
-Proposal for New Linking Strategy supporting Lazy Evaluation: Op.make_thunk
-=============================================================================
-
-.. note::
-
-   Proposal made June 2010.
-
-
-Motivation
-===========
-
-Conditional evaluation is useful to describe many optimization algorithms where
-the update expressions depend on internal state.
-
-True conditional evaluation requires lazy graph evaluation.
-Without lazy graph evaluation, the runtime of a graph can be exponential in the
-number of conditionals instead of linear.  No one waits an exponential amount of
-time, so instead people work around this problem in various other ways, but it
-would be better if theano had an 'if-then-else' expression (call it cond).
-
-A lazily-evaluted 'cond' requires a linker to use a different method for
-interacting with Ops.  Neither the current perform() nor c_code() approaches
-support lazy evaluation.
-Why do perform (and c_code) not handle lazy evaluation?
-The syntax of the current perform() could be extended to be compatible with lazy
-evaluation. For example, the  linker could set all inputs to None, and use the
-return value from perform() to see which inputs are required.  But all the Ops
-that currently implement a perform() function would be broken because their
-perform implementations do not ask for inputs before using them.  I don't see a
-way around this.  The same restriction applies to c_code.
-
-The way around this is to introduce a new interface for the linker to talk to
-Ops.  I propose that we add an Op.make_thunk() that returns an object satisfying
-this interface.
-
-
-At the same time, it appears that as we try to integrate PyCUDA Ops another
-problem arises.  We would like to use Op.perform() to drive the GPU, but it is
-natural to move compilation of the CUDA kernel to a point after make_node() and a
-point before perform().   The point where the linker makes an thunk from the Op
-seems like a natural choice.
-
-
-A third motivation for introducing an Op.make_thunk function is to clarify the
-relationship between Ops (the classes you implement in Python) and mathematical
-operations (the more abstract things in terms of which you think when using
-Theano).
-I propose that *technically* an Op, when conditioned by particular inputs,
-generates *at most one implementation* that defines the behaviour of that Op.
-In *intuitive terms*, the abstract mathematical steps that we sometimes talk about regarding Theano
-still correspond to Ops -- it's just that these Ops have relatively generic
-implementations.
-The process of optimization is to specialize those generic implementations
-by using information from the rest of the graph.
-If we accept that an Op corresponds to at most one implementation,
-then it makes sense to ask an Op instance to expose that implementation via a
-standard interface (Op.make_thunk).
-It does not make sense to pass arguments to Op.make_thunk such as 'py' or "c|py"
-to tell the Op which implementation to use.  The Op instance represents just one
-implementation, and flags such as 'py' or 'c|py' should be passed to the Op's
-constructor.
-
-
-Proposal: Op.make_thunk
-==========================
-
-There are two interface items I propose to add.  The first is a Thunk object
-(which we have never had before), and the second is a new function (make_thunk)
-in the PureOp class (a superclass of Op) that will return a Thunk.
-
-.. code-block:: python
-
-  class Thunk (object):
-    """Abstract class / interface
-
-    It describes the interface used by a Theano linker to execute the nodes in a
-    graph.  Thunk instances are in correspondance with Apply instances that
-    remain in the final form of the graph after optimization.
-
-    """
-
-    lazy = property(...,
-       """True means the thunk may trigger lazy evaluation.
-        False means the thunk always requires all inputs and computes all
-        outputs.
-        Consequently False implies that __call__ always returns None
-        """
-
-    def __call__(self):
-       """Thunk will compute some number (or zero) of outputs and in the case
-       that it cannot compute all its outputs for lack of inputs, this function
-       will return a list of input indexes that are required.  The linker will
-       typically compute those required inputs and then call this
-       __call__ function again.
-       The thunk is considered to be finished when it returns an empty list or
-       None.
-       """
-
-.. code-block:: python
-
-    class PureOp(object):                       # recall:
-                                                # Op inherits from PureOp
-
-        def make_node(self, *inputs):           # leave alone
-           ...
-
-        def perform(self, node,
-              inputs, output_storage):          # move to `Op` class
-           ...
-
-        def make_thunk(self, node,              # new function
-              input_computed, output_computed,
-              input_registers, output_registers,
-              ):
-           """
-           :type node: Apply instance
-           :param node: previous rval from make_node(self, *inputs)
-
-           :type input_computed: list of len-1 lists, with values in (0,1).
-           :param input_computed: at runtime, input_computed[i][0]==1 implies
-               that the i'th input has been computed and stored at
-               input_registers[i][0], and is available for use.
-               Otherwise the content of input_registers[i][0] is undefined.
-
-           :type output_computed: list of len-1 lists, with values in (0,1).
-           :param output_computed: at runtime, output_computed[i][0]==1 implies
-               that the i'th output has already been computed and stored at
-               output_registers[i][0].
-               Otherwise, output_registers[i][0] will contain either None, or
-               a value that was previously computed by this thunk.
-
-           :type input_registers: list of len-1 lists
-           :type output_registers: list of len-1 lists
-
-           :param input_registers: the i'th input can be read from
-           input_registers[i][0] when input_computed[i][0] == 1.
-
-           :param output_registers: the i'th output must be stored to
-           output_registers[i][0], at which point the thunk must set output_computed[i][0] == 1.
-
-           :returns: a Thunk (subclass) instance
-
-           """
-
-
-The Thunk class can have subclasses that use Op.perform and Op.c_code as we use
-them now.  The interface of Thunk is backward-compatible with the thunks built
-by the CLinker and PerformLinker.  If a graph contains zero Thunks with
-lazy==True, then the current Linkers will continue to work.
-The new Thunk interface will support a new LazyLinker that can run programs for
-which some thunks have lazy==True.
-
-The Thunk class can have subclasses that are implemented in C, which might help
-performance.
-
diff --git a/doc/proposals/dp_optimization.txt b/doc/proposals/dp_optimization.txt
deleted file mode 100644
index 0aa9709a9d..0000000000
--- a/doc/proposals/dp_optimization.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-
-========================
-DP Instruction Selection
-========================
-
-Read Ch 9 of Modern Compiler Implementation about instruction selection.
-We should probably be doing graph optimization totally differently:  
-Optimizations *only add* new ways of implementing something, they do not replace
-the old way.  Every graph node (apply) as a cost, and Dynamic Programming (DP)
-is used to select the minimum cost graph.
-
-The advantage of this approach is that optimizations do not have to run in such
-a careful order, and graph selection would be much faster.
-
-
-Think about how aliasing and destructive operations (the destroy-handler) would
-fit in this approach.
-
diff --git a/doc/proposals/graphical_models.txt b/doc/proposals/graphical_models.txt
deleted file mode 100644
index 4f5483ddf7..0000000000
--- a/doc/proposals/graphical_models.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-==================================================================
-Random Numbers, Random Variables and Compiling Graphical Models
-==================================================================
-
-.. note:
-   Proposed 2010 02 06
-
-
-Objective
-=========
-
-It might be nice to use Theano as a language and compiler for questions about
-graphical models.
-
-In this way, we could express something like Logistic Regression like this:
-
-.. code-block:: python
-
-    from theano import random_variable as RV
-
-    X, Y, s_idx = RV.empirical(my_dataset)
-
-    # model parameters
-    v = shared(numpy.zeros(()))
-    b = shared(numpy.zeros(()))
-
-    Y_hat = RV.multinomial(n=1, p=softmax(dot(X,v)+b))
-
-    cost = sum(-log(Y_hat.density(Y)))
-
-    train_fn = function([s_idx], cost, updates=[[v,b], grad(cost, [v,b]]))
-
-.. code-block:: python
-
-    RandomVariable(Variable)
-
-        def sample(self, n):
-            """[Symbolically] draw a sample of size n"""
-
-        def density(self, pt, givens=None):
-            """Conditional Density/Probability of P(self=pt)
-
-            Implicitly conditioned on knowing the values of all variables
-            on which this one depends.  Optionally override ancestor variables
-            using givens.
-            """
-
-        def mode(self):
-            """Return expression of the most likely value of this distribution"""
-
-We would really like to integrate out certain variables sometimes...
-
-
-An RBM could be expressed like this:
-
-.. code-block:: python
-
-    w = shared(initial_weights)
-    v = shared(initial_visible_biases)
-    u = shared(initial_hidden_biases)
-    visible = RV.binomial(n=1, p=None) # p filled in by EnergyModel
-    hidden =  RV.binomial(n=1, p=None) # p filled in by EnergyModel
-
-    energy = dot(visible,v) + dot(hidden, u) + dot(dot(visible, w), hidden)
-
-    RBM = EnergyModel(energy, variables={'visible':visible, 'hidden':hidden], params=[w,v,u])
-
-    RBM.energy(v,h) # an expression for the energy at point (v,h)
-
-    RBM.visible.energy(h) # an expression for the free energy
-    RBM.hidden.energy(h) # an expression for the free energy
-    v_given_h = RBM.visible.conditional(h) # a random variable
-
-Rather than program all the training algorithms into an RBM module,
-the idea would be to express the relationship between RBM variables so that we
-could automatically recognize how to do Gibbs sampling, gradient descent on Free
-Energy, etc.
-
diff --git a/doc/proposals/index.txt b/doc/proposals/index.txt
deleted file mode 100644
index 1c5ce78f4f..0000000000
--- a/doc/proposals/index.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-:orphan:
-
-.. _proposals:
-
-==================================
-Proposals for new/revised features
-==================================
-
-.. toctree::
-    :maxdepth: 1
-    :glob:
-
-    *
-
diff --git a/doc/proposals/intermediate_language.txt b/doc/proposals/intermediate_language.txt
deleted file mode 100644
index fe57ea6d10..0000000000
--- a/doc/proposals/intermediate_language.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-
-=====================
-Intermediate Language
-=====================
-
-
-It would be nice to be able to use Theano from other languages.
-This requires two things: a way to communicate the expression to the theano
-compiler, and a way to pass data to and from the compiled function.
-
-One way to do this would be define a textual representation of theano graphs.
-A Scheme-like language seems appropriate.  Perhaps just scheme would be
-appropriate.
-
-
-How to pass shared variables?
-
diff --git a/doc/proposals/mongodb_cache.txt b/doc/proposals/mongodb_cache.txt
deleted file mode 100644
index 90f6a6864d..0000000000
--- a/doc/proposals/mongodb_cache.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-
-=================
-MongoDB DLL Cache
-=================
-
-
-In network environments (like at DIRO on NFS3), a distributed DB like mongo or couch is faster and more
-robust to concurrency than the $HOME/.theano.  Also, a single cache could be
-shared by multiple users.  This would result in less compilation time, for
-everyone, and less stale-cache problems.
-
diff --git a/doc/proposals/opt_patterns2.txt b/doc/proposals/opt_patterns2.txt
deleted file mode 100644
index 6ce635bc1e..0000000000
--- a/doc/proposals/opt_patterns2.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-
-======================
-Optimization Patterns
-======================
-
-.. note:
-   Proposed 2010 01 20
-
-
-Motivation
-==========
-
-Theano optimizations are organized at high level, 
-but canonicalization and specialization (C&S) are a mess.  It is difficult to know how a graph will
-be optimized, or to know in which order optimizations will be performed.  
-C&S is also slow because of the guess-and-check nature of node optimization within equilibrium
-optimizers (VERIFY THIS BY PROFILING).
-C&S functions are also very difficult and tedious to write because of
-symmetries in the graph, and because of the lack of standard Op names 
-(e.g. ``T.add``, ``T.and_``, and ``T._shape``).  Gemm and the advanced_indexing -> xent
-optimization are particularly tricky examples.
-
-Defining a sort of regexp-like approach for describing graph substitutions would ideally be
-less error-prone, less tedious, more efficient to evaluate, easier to document, and all-round
-better.
-
-
-Proposal
-========
-
-In a nutshell: revisit the PatternSub and make it more powerful.
-
-Olivier B. (original author or PatternSub) mentioned that one of the problems was the annoyance
-of working through DimShuffle
-
-Olivier B. also suggests writing scalar-related patterns in terms of scalars, and then inferring Tensor-related patterns.
-
diff --git a/doc/proposals/pfunc.txt b/doc/proposals/pfunc.txt
deleted file mode 100644
index 54dbc8950a..0000000000
--- a/doc/proposals/pfunc.txt
+++ /dev/null
@@ -1,226 +0,0 @@
-
-=============================================
-Proposal for pfunc  Function Interface [DONE]
-=============================================
-
-.. note::
-
-  This proposal was implemented some time around summer 2009, and merged into
-  the trunk around new years 2010.
-
-Following discussion on theano-dev (titled TheanoObject), the following
-changes are proposed to make function-construction calls more
-readable and intuitive, and to make it easier to share values between
-functions.
-
-The strategy is to
-
-- introduce a new kind of ``Variable`` (``SharedVariable``) that has a container
-  associated with it, and can allow multiple functions to share a value.
-
-- introduce a friendlier version of function (tentative name ``pfunc``),
-
-The following code gives a very quick idea of what is being proposed:
-
-..code-block:: python
-
-    a = lscalar()
-    b = shared(1) #NEW: create a shared variable
-
-    f1 = pfunc([a], a+b)
-    f2 = pfunc([In(a, value=44)], a + b, updates={b: b + 1})
-
-    b.value # -> 1
-
-    f1(3)   # -> 4
-    f2(3)   # -> 4 (but update b.value with += 1)
-    b.value # -> 2
-
-    f1(3)   # -> 5
-
-    b.value = 0
-    f1(3)   # -> 3
-
-
-Declaring a Shared Variable
-===========================
-
-The proposal is for two new ways of creating a *shared* variable:
-
-.. code-block:: python
-
-    class SharedVariable(Variable):
-        """
-        Variable with a value that is (defaults to being) shared between functions that it appears in.
-        """
-
-        def __init__(self, name, type, value, strict):
-            """
-            :param name: The name for this variable (see `Variable`).
-
-            :param type: The type for this variable (see `Variable`).
-
-            :param value: A value to associate with this variable (a new container will be created).
-
-            :param strict: True -> assignments to .value will not be cast or copied, so they must
-            have the correct type.
-
-            :param container: The container to use for this variable. Illegal to pass this as well
-            as a value.
-
-            For more user-friendly constructor, see `shared`
-
-            """
-            ...
-
-
-
-        value = property(...)
-        """Read/write the non-symbolic value associated with this SharedVariable.
-
-        If the SharedVariable is shared, changes to this value will be visible to all functions using
-        this SharedVariable.  If this SharedVariable is not shared, a change will not be visible to
-        functions that were created before the change.
-
-        """
-
-    def shared(value, name=None, strict=False, **kwargs):
-        """Return a SharedVariable Variable, initialized with a copy or reference of `value`.
-
-        This function iterates over constructor functions (see :func:`shared_constructor`) to find a
-        suitable SharedVariable subclass.
-
-        :note:
-        By passing kwargs, you effectively limit the set of potential constructors to those that
-        can accept those kwargs.
-
-        """
-        ...
-
-The function `shared` is a factory-method intended for end-users.
-
-Direct construction of a ``SharedVariable`` is probably not going to be a common
-pattern, it will be more common to subclass it (i.e. ``TensorSharedVariable``,
-``SparseSharedVariable``, etc.) and to register a constructor so that these
-subclasses will be instantiated by the `shared` factory method.
-
-A ``SharedVariable`` instance is meant to change over the duration of a program,
-either because of the updates of a function call, or because of direct
-assignment to its ``.value`` field.
-At any time, the ``.value`` field can be be used to access the current value
-associated with the shared value.
-
-Using SharedVariables as pfunc Parameters
-=========================================
-
-A ``SharedVariable`` instance has a ``value`` property that can be used to get and
-set the value associated with that shared variable in all the ``pfunc``
-functions that use it.
-
-.. code-block:: python
-
-    a = tensor.lscalar()
-    b = shared(7)
-
-    # create two functions that use `b` as an implicit input
-    f1 = pfunc([a], a + b)
-    f2 = pfunc([a], a * b)
-
-    f1(5) # -> 12
-    b.value = 8    # modify the shared variable's value
-
-    f1(5) # -> 13   # the new value is reflected in any compiled functions
-    f2(4) # -> 32   # f2 uses the latest value in b's container
-
-However, SharedVariables cannot be used as inputs to theano functions.
-This is because doing it may yield code that would be either ambiguous, or
-prone to easy mistakes (e.g. accidentally overwriting the content of a shared
-variable).
-
-
-pfunc
-=====
-
-The examples above give the general flavour of what pfunc is for.
-Their signatures are below.
-Corner cases and exotic examples can be found in the tests.
-
-.. code-block:: python
-
-    def pfunc(params, outputs, mode=None, givens=None, updates=None)
-        """Function-constructor for graphs with shared variables.
-
-        :type params: list of either Variable or In instances.
-        :param params: function parameters, these are not allowed to be shared
-        variables
-
-        :type outputs: list of Variables or Out instances
-        :param outputs: expressions to compute
-
-        :param mode: compilation mode
-
-        :type updates: iterable over pairs (shared_variable, new_expression). List, tuple or dict.
-        :param updates: update the values for SharedVariable inputs according to these expressions
-
-        :rtype: theano.compile.Function
-        :returns: a callable object that will compute the outputs (given the inputs)
-        and update the implicit function arguments according to the `updates`.
-
-        """
-        ...
-
-
-Note that if some update value is not a variable, it will be cast into
-a ``SharedVariable`` using the ``shared`` function. This ensures it is
-properly taken into account to build the Theano function underlying the
-``pfunc``. A consequence of this is that if this update value is mutable
-(e.g. a Numpy array), it may be modified after the function is created.
-
-
-NNet Example
-============
-
-Of course there are lots of ways to write the following code, but this is one
-simple one.
-
-.. code-block:: python
-
-
-    import numpy, theano
-
-    from pfunc import pfunc
-    from sharedvalue import shared
-    from theano import tensor as tt
-    from theano.gradient import grad
-    from theano.tensor.nnet import sigmoid
-
-    class NNet(object):
-
-        def __init__(self,
-                input = tt.dvector('input'),
-                target = tt.dvector('target'),
-                n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw):
-            super(NNet, self).__init__(**kw)
-
-            self.input = input
-            self.target = target
-            self.lr = shared(lr, 'learning_rate')
-            self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
-            self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
-
-            self.hidden = sigmoid(tt.dot(self.w1, self.input))
-            self.output = tt.dot(self.w2, self.hidden)
-            self.cost = tt.sum((self.output - self.target)**2)
-
-            self.sgd_updates = {
-                        self.w1: self.w1 - self.lr * grad(self.cost, self.w1),
-                        self.w2: self.w2 - self.lr * grad(self.cost, self.w2)}
-
-            self.sgd_step = pfunc(
-                    params = [self.input, self.target],
-                    outputs = [self.output, self.cost],
-                    updates = self.sgd_updates)
-
-            self.compute_output = pfunc([self.input],  self.output)
-
-            self.output_from_hidden = pfunc([self.hidden], self.output)
diff --git a/doc/proposals/premerge.txt b/doc/proposals/premerge.txt
deleted file mode 100644
index 28fd23e95d..0000000000
--- a/doc/proposals/premerge.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-
-Proactive Merging
-=================
-
-
-Merge is done now as an optimization. 
-But if Merging was done at graph construction time, things like #476 would work.
-Additionally, memo-izing at graph construction time would make it possible to
-define recursive formula with recursive python functions (e.g. Fibonacci).
-Currently the merge optimization would make the Fibonacci series linear, but the
-size of the program used to express the program would be exponential.
-
diff --git a/doc/proposals/tensor_attributes.txt b/doc/proposals/tensor_attributes.txt
deleted file mode 100644
index 40fac0850f..0000000000
--- a/doc/proposals/tensor_attributes.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-========================
-Add tensor attributes
-========================
-
-
-Size, shape, psd, symmetric, triangular, contiguous.  
-Add these attributes to the TensorType with the option always that they be
-'unknown'.
-Add attributes that are useful for optimizations, or useful for code generation.
-
-
diff --git a/doc/requirements.inc b/doc/requirements.inc
index bde9fb60a5..b982a95140 100644
--- a/doc/requirements.inc
+++ b/doc/requirements.inc
@@ -33,7 +33,7 @@ Requirements
 **Optional requirements**
 
     |PlatformCompiler|
-        **Highly recommended.** Theano can fall back on a NumPy-based Python execution model, but a C compiler allows for vastly faster execution.
+        **Highly recommended.** Aesara can fall back on a NumPy-based Python execution model, but a C compiler allows for vastly faster execution.
 
     `Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
         For building the documentation. LaTeX_ and dvipng_ are also necessary for math to show up as images.
@@ -56,9 +56,9 @@ Requirements
         git+https://github.com/lebedov/scikit-cuda.git#egg=scikit-cuda``.
 
     `warp-ctc`_
-        Required for :ref:`Theano CTC implementation
+        Required for :ref:`Aesara CTC implementation
         <libdoc_tensor_nnet_ctc>`. It is faster then using an
-        equivalent graph of Theano ops.
+        equivalent graph of Aesara ops.
 
 
 Requirements installation through Conda (recommended)
diff --git a/doc/sandbox/debugging_with_stepmode.txt b/doc/sandbox/debugging_with_stepmode.txt
index 8212f9480d..dd8a38fc17 100644
--- a/doc/sandbox/debugging_with_stepmode.txt
+++ b/doc/sandbox/debugging_with_stepmode.txt
@@ -4,16 +4,16 @@
 Debugging with a customized so-called StepMode
 ==============================================
 
-One convenient trick I've found for debugging my programs that are running with theano is to
+One convenient trick I've found for debugging my programs that are running with aesara is to
 use what I call a 'StepMode'.  There is no such StepMode in the standard library because the
 purpose of it is to hack it to investigate what your own particular program is doing.
 
 
 .. code-block:: python
 
-    from theano.link import WrapLinkerMany
-    from theano.configdefaults import config
-    from theano.compile.mode import (Mode, register_mode, predefined_modes, predefined_linkers,
+    from aesara.link import WrapLinkerMany
+    from aesara.configdefaults import config
+    from aesara.compile.mode import (Mode, register_mode, predefined_modes, predefined_linkers,
             predefined_optimizers)
 
     class StepMode(Mode):
@@ -73,5 +73,3 @@ powerful way to understand your program's execution.
 
 Remember, if you give names your variables then printing nodes will give you a better idea of
 where in the calculations you are.
-
-
diff --git a/doc/sandbox/functional.txt b/doc/sandbox/functional.txt
index 706dd67cd1..f34fdd71db 100644
--- a/doc/sandbox/functional.txt
+++ b/doc/sandbox/functional.txt
@@ -3,5 +3,5 @@
 Functional
 ==========
 
-Want to know about Theano's `function design
+Want to know about Aesara's `function design
 <http://groups.google.com/group/theano-dev/browse_thread/thread/fd4c6947d8a20510>`?
diff --git a/doc/sandbox/hosting.txt b/doc/sandbox/hosting.txt
deleted file mode 100644
index 91b32a9bec..0000000000
--- a/doc/sandbox/hosting.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-==============
-Hosting Theano
-==============
-
-'''Historical Interest.  This has been addressed for now.  20080904'''
-
-There are several [http://en.wikipedia.org/wiki/Comparison_of_free_software_hosting_facilities project hosting services] online, but none is perfect for theano.
-
-Wishlist:
- - version control (mercurial)
- - bugtracker (TRAC, ideally)
- - wiki
- - release file hosting
- - mailing list
- - reliability of hosting service
-
-Should we get a domain name? To my dismay, theano.org, theano.com and theano.net are all taken. The first two seem legit, but theano.net doesn't look like it has anything on it and expires on May 29, so maybe there's a chance we can snag it? -ob
-
-We could also get [http://www.theano.io]. -jpt
-
---------
-
-On Fri, May 09, 2008 at 03:49:31PM -0400, Joseph Turian wrote:
-> Another option for backup:
-> 
-> Since we have access to LGCM, there is a single SQLite db file (AFAIK) 
-> that we can back up periodically.
-> e.g. cron job to gzip and email it to us once a week.
-
-There are instructions for how to backup a Trac site, i just haven't gotten
-around to it. Currently, the whole directory is rsynced to the lisa account,
-which is close to ok, but not quite.
-
-> Besides mailing list, is there anything else we need? Besides figuring 
-> out how to administer trac? :}
-
-Writing scripts to update p-omega1/.ssh/authorized_keys2 automatically from
-certain user accounts' authorized_keys2 file.  I've written this script, but not
-really tested it.
-
-Hooking up mercurial to trac would be nice, so we can associate commits and
-tickets.
-
-lgcm's uptime is usually about a week or two at max, so there's the pain in the
-ass of having to re-log in, start up a screen session, find the directories,
-restart trac, restart hg serve.  We should be restarting hg serve for tlearn too
-soon.
-
-Even if I do set up the authorized_keys2 script to do the right thing, the users
-on TRAC and the users on the system are totally independent, so adding a new
-user is non-standard and only I can do it right now.
-
-My choices seem to be:
- - document all these hoops and good ideas
- - fix them so they are easier to use and document
- - replace them with hosting service
-
-All of these options take time, mental effort, and the support of our
-development group (look the large number of messages today on the topic)... so
-i'm trying to find the least of all evils.  The Right Thing doesn't seem to have
-appeared yet.
-
-
-
diff --git a/doc/sandbox/how_to_make_ops.txt b/doc/sandbox/how_to_make_ops.txt
index 8778d12834..3116eca0c0 100644
--- a/doc/sandbox/how_to_make_ops.txt
+++ b/doc/sandbox/how_to_make_ops.txt
@@ -12,7 +12,7 @@ An Op class can represent one or a wide variety of functions depending on how yo
 
 It is not always obvious what should be a parameter and what should be an input. For example, a generic indexing Op could take a list and an index as graph inputs, whereas a specific indexing Op could have an index parameter, so you could have a specialized Op instance to fetch the nth element of a list, where n is known statically. [*Could you give some advice about the relative tradeoffs of having something as a parameter and something as an input?*]
 
-Examples of parameterized Ops in theano:
+Examples of parameterized Ops in aesara:
   ``Broadcast(<scalar op>, <inplace?>)``
     upgrades an op that works on scalars so it works on tensors. Can work inplace or not.
   ``Reduce(<scalar op>, <axes>)``
@@ -46,7 +46,7 @@ The ``make_node`` method is expected to have the following signature:
 
     make_node(self, *inputs)
 
-``inputs`` may be a list of anything that the user wants to provide as symbolic input (symbolic: standing for the actual values that will be passed when the graph is compiled into an executable function). [*The Theano intro should describe symbolic in greater depth, and we should link to that from here.*] This may or may not include Variable instances (but if you want the inputs of this Op to sometimes be outputs of another Op, then the inputs should be Variable instances). [*What else could they be? Constant, Values, ...*] The return value should be an instance of [GraphStructures Apply] (see the example below). Here are the tasks typically handled in ``make_node``.
+``inputs`` may be a list of anything that the user wants to provide as symbolic input (symbolic: standing for the actual values that will be passed when the graph is compiled into an executable function). [*The Aesara intro should describe symbolic in greater depth, and we should link to that from here.*] This may or may not include Variable instances (but if you want the inputs of this Op to sometimes be outputs of another Op, then the inputs should be Variable instances). [*What else could they be? Constant, Values, ...*] The return value should be an instance of [GraphStructures Apply] (see the example below). Here are the tasks typically handled in ``make_node``.
 
   * Check that the inputs are valid (type checking, etc.). [*Since we don't actually have values, what can we do besides type checking?*]
   * If needed, wrap the inputs in Variable instances with the proper type.
@@ -59,12 +59,12 @@ Example:
 
 .. code-block:: python
 
-	from theano.scalar import *
-	
+	from aesara.scalar import *
+
 	class Add(Op):
 	    #...
 	    def make_node(self, x, y):
-	        # note 1: constant, int64 and Scalar are defined in theano.scalar
+	        # note 1: constant, int64 and Scalar are defined in aesara.scalar
 	        # note 2: constant(x) is equivalent to Constant(type = int64, data = x)
 	        # note 3: the call int64() is equivalent to Variable(type = int64) or Variable(type = Scalar(dtype = 'int64'))
 	        if isinstance(x, int):
@@ -80,7 +80,7 @@ Example:
 	        node = Apply(op = self, inputs = inputs, outputs = outputs)
 	        return node
 	    #...
-	
+
 	add = Add()                               # I make an instance of Add
 	node1 = add.make_node(int64(), int64())   # I make a node with two Variable inputs
 	node2 = add.make_node(1, 2)               # this works too
@@ -113,7 +113,7 @@ It is not necessary or recommended to override ``__call__`` unless you want to h
 perform
 =======
 
-The ``perform`` method is expected to have the following signature: 
+The ``perform`` method is expected to have the following signature:
 
 ``
 perform(self, node, inputs, output_storage)
@@ -140,7 +140,7 @@ Here is an example of a properly defined ``perform``:
 	        z, = output_storage  # extract the one storage (the comma after z is not optional)
 	        z[0] = x + y         # we must put the variable in z[0]
 	    ...
-	
+
 	add = Add()                               # I make an instance of Add
 	node = add.make_node(int64(), int64())    # I make a node with two integer inputs
 	storage = [None]                          # I make my storage as a 1-element list with None
@@ -167,7 +167,7 @@ Advanced note: for an Op with multiple outputs, it is possible that some of them
 grad
 ====
 
-``grad`` is a theano-specific [*as opposed to?*]  function - it does not interface with core optimization and compilation facilities, but it provides a useful interface to differentiation. Its expected signature is:
+``grad`` is an Aesara-specific [*as opposed to?*]  function - it does not interface with core optimization and compilation facilities, but it provides a useful interface to differentiation. Its expected signature is:
 
 .. code-block:: python
 
@@ -190,9 +190,9 @@ Essentially, the semantics are:
 	def grad(self, (x, y), (gz, gw)):
 	   # In this situation you want two return values that have the shape of x and y respectively
 	   return gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
-	
-More specifically, 
-``grad`` must return a list or tuple of input gradients, as many as there are inputs. Let C be a Variable (currently assumed to be a scalar) that depends through a theano symbolic expression on the node outputs. Then each output_gradients[i] represents symbolically dC/doutputs[i]. The returned input gradients should represent symbolically dC/dinputs[i].
+
+More specifically,
+``grad`` must return a list or tuple of input gradients, as many as there are inputs. Let C be a Variable (currently assumed to be a scalar) that depends through an Aesara symbolic expression on the node outputs. Then each output_gradients[i] represents symbolically dC/doutputs[i]. The returned input gradients should represent symbolically dC/dinputs[i].
 
 Example:
 
@@ -207,7 +207,7 @@ Example:
 	    ...
 	mul = Mul()
 
-If the op is not differentiable wrt one of its inputs, the gradient for that input should be ``None``; if the op is not differentiable with respect to any of its inputs, it should return something equivalent to 
+If the op is not differentiable wrt one of its inputs, the gradient for that input should be ``None``; if the op is not differentiable with respect to any of its inputs, it should return something equivalent to
 ``[None] * len(inputs)``.  If ``grad`` is not implemented for any op in a graph, then the symbolic gradient engine will complain (with an attribute exception).
 
 
@@ -254,7 +254,7 @@ Example: if we expect to call the op repeatedly on incrementally bigger inputs,
 	    default_output = 0
 	    def make_node(self, x, y):
 	        return Apply(self, [x,y], [x.type.make_variable(), x.type.make_variable()])
-	
+
 	    def perform(self, node, (x, y), (z, stor)):
 	        if z[0] is None or stor[0] is None:
 	            stor[0] = numpy.ndarray(x.size * 2)
@@ -273,6 +273,3 @@ It is also possible but potentially more complicated to use "private
 inputs" to do the same thing: inputs cannot be set, though their contents
 can be modified, so a wrapper would be needed and the input must be
 marked as 'destroyed' by the Op using the 'destroy_map' field.
-
-
-
diff --git a/doc/sandbox/logistic_regression_example.txt b/doc/sandbox/logistic_regression_example.txt
index 389bc9db2b..f0584d0370 100644
--- a/doc/sandbox/logistic_regression_example.txt
+++ b/doc/sandbox/logistic_regression_example.txt
@@ -36,7 +36,7 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
         #
         # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
 
-        update_fn = theano.function(
+        update_fn = aesara.function(
             inputs = [x, y, stepsize,
                 In(w,
                     name='w',
@@ -54,7 +54,7 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
             outputs = cost,
             mode = 'EXPENSIVE_OPTIMIZATIONS')
 
-        apply_fn = theano.function(
+        apply_fn = aesara.function(
             inputs = [x, In(w, value=update_fn.storage[w]), In(b, value=update_fn.storage[b])],
             outputs = [prediction])
 
diff --git a/doc/sandbox/max_gotcha.txt b/doc/sandbox/max_gotcha.txt
deleted file mode 100644
index 2af5c34169..0000000000
--- a/doc/sandbox/max_gotcha.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-.. _sandbox_maxgotcha:
-
-==========
-Max Gotcha
-==========
-
-Guillaume and I just found a bug in some experiment code that was
-basically caused by confusing semantics of max().  The same sort of
-thing applies to min.  This is an FYI email to help others on the list
-avoid this mistake, which is (I think) easy to make.
-
-Python's max() function takes multiple arguments and returns the
-largest one of them. (I won't go into the details of how it deals with
-corner cases.)
-
-IN CONTRAST
-
-numpy's max() function takes multiple arguments and returns the
-largest element[s] from the *first* argument.  The second argument is
-used to identify the axis along which to evaluate the [python-style]
-max.  The third argument is an array into which the result can be
-written.
-
-So for example:
-
-.. doctest::
-   :options: +SKIP
-
-   >>> import numpy
-   >>> max(3, 4)
-   4
-   >>> numpy.max(3, 4) # This is an error
-   3
-   >>> a, b, c = [numpy.asarray(i) for i in [0, 1, 2]]
-   >>> numpy.max(a, b, c) # This is an error
-   0
-   >>> c
-   array(0)
-
-Be careful!
-
-Theano defines a max function (called theano.tensor.max) that is
-similar numpy's max.
-
-Theano also defines a function called theano.tensor.largest that is
-closer to python's, but not identical since it works elemwise for
-tensors.  There is a corresponding 'smallest' function that is like
-min()
diff --git a/doc/sandbox/performance.txt b/doc/sandbox/performance.txt
index 9f147b7b96..1d44126ebc 100644
--- a/doc/sandbox/performance.txt
+++ b/doc/sandbox/performance.txt
@@ -3,21 +3,21 @@
 Performance
 ===========
 
-Theano uses several tricks to obtain good performance:
+Aesara uses several tricks to obtain good performance:
  * common sub-expression elimination
  * [custom generated] C code for many operations
  * pre-allocation of temporary storage
  * loop fusion (which gcc normally can't do)
 
 On my neural net experiments for my course projects, I was getting around 10x
-speed improvements over basic numpy by using theano.
+speed improvements over basic numpy by using aesara.
 [More specific speed tests would be nice.]
 
 
-With a little work, Theano could also implement more sophisticated
+With a little work, Aesara could also implement more sophisticated
 optimizations:
 
- * automatic ordering of matrix multiplications 
+ * automatic ordering of matrix multiplications
  * profile-based memory layout decisions (e.g. row-major vs. col-major)
  * gcc intrinsics to use MMX, SSE2 parallelism for faster element-wise arithmetic
  * conditional expressions
diff --git a/doc/sandbox/randomnumbers.txt b/doc/sandbox/randomnumbers.txt
index 964e422114..2f12025c00 100644
--- a/doc/sandbox/randomnumbers.txt
+++ b/doc/sandbox/randomnumbers.txt
@@ -11,7 +11,7 @@ Random Numbers
 == Requirements ==
 
 
-Theano functions sometimes need random numbers.
+Aesara functions sometimes need random numbers.
 Random operations are not as simple as other operations such as ones_like, or pow(), because the output must be different when we call the same function repeatedly.  CompileFunction's new default-valued, updatable input variables make this possible.  At the same time we need random streams to be repeatable, and easy to work with.  So the basic requirements of our random number mechanism are:
 
  1. Internal random number generators must be used in a clear manner, and be accessible to the caller after a function has been compiled.
@@ -25,7 +25,7 @@ Random operations are not as simple as other operations such as ones_like, or po
 
 One option would be to skirt the issue by requiring users to pass all the random numbers we might need as input.
 However, it is not always simple to know how many random numbers will be required because the shape of a random matrix might be computed within the graph.
-The solution proposed here is to pass one or more random number generators as input to {{{theano.function}}}.
+The solution proposed here is to pass one or more random number generators as input to {{{aesara.function}}}.
 
 Sharing a random number generator between different {{{RandomOp}}} instances makes it difficult to producing the same stream regardless of other ops in graph, and to keep {{{RandomOps}}} isolated.
 Therefore, each {{{RandomOp}}} instance in a graph will have its very own random number generator.
@@ -41,7 +41,7 @@ So the proposal is to provide the missing functionality (the last three requirem
 
     #!python
     # create a random generator, providing a default seed to condition how RandomOp instances are produced.
-    from theano.compile.function import function
+    from aesara.compile.function import function
 
 
     r = MetaRandom(metaseed=872364)
@@ -99,7 +99,7 @@ So the proposal is to provide the missing functionality (the last three requirem
 == {{{MetaRandom}}} ==
 
 The {{{MetaRandom}}} class is the proposed interface for getting {{{RandomOp}}} instances.
-There are some syntactic similarities in the way {{{MetaRandom}}} is used to construct graphs, and the way {{{numpy.RandomState}}} appears in a corresponding procedural implementation.  But since theano is symbolic the meaning of {{{MetaRandom}}} is quite different.
+There are some syntactic similarities in the way {{{MetaRandom}}} is used to construct graphs, and the way {{{numpy.RandomState}}} appears in a corresponding procedural implementation.  But since aesara is symbolic the meaning of {{{MetaRandom}}} is quite different.
 
 As with {{{numpy.RandomState}}} though, a global instance of {{{MetaRandom}}} will be instantiated at import time for the scripter's convenience.
 
@@ -186,7 +186,7 @@ then any time afterward both {{{r.setstate(fn, state_99)}}} and {{{r.seed(fn, 99
 
     #!python
     # create a random state
-    from theano.compile.function import function
+    from aesara.compile.function import function
 
 
     r = RandomState(name = 'r')
diff --git a/doc/sandbox/sandbox.txt b/doc/sandbox/sandbox.txt
index 986ef0ddc7..4ecbdb657a 100644
--- a/doc/sandbox/sandbox.txt
+++ b/doc/sandbox/sandbox.txt
@@ -35,13 +35,15 @@ Gradients for a particular variable can be one of four kinds:
 
 You will get an exception of the following form.
 
-theano.graph.utils.MethodNotDefined: ('grad', <class 'pylearn.algorithms.sandbox.cost.LogFactorial'>, 'LogFactorial')
+.. code-block:: python
+
+    aesara.graph.utils.MethodNotDefined: ('grad', <class 'pylearn.algorithms.sandbox.cost.LogFactorial'>, 'LogFactorial')
 
 2) a symbolic variable
 3) None / zero
 4) undefined mathematically
 
-currently, there is no way for a grad() method to distinguish between cases 3
+currently, there is no way for a ``grad()`` method to distinguish between cases 3
 and 4
 but the distinction is important because graphs with type-3 gradients are ok
 to run, whereas graphs with type-4 gradients are not.
@@ -72,7 +74,7 @@ should be encouraged where possible.**
 how do we write scalar ops and upgrade them to tensor ops?
 ============================================================
 
-**Olivier says that** :class:`~theano.tensor.xlogx.XlogX` **gives a good example. In fact, I would
+**Olivier says that** :class:`~aesara.tensor.xlogx.XlogX` **gives a good example. In fact, I would
 like to beef xlogx up into our running example for demonstrating how to
 write an Op:**
 
@@ -103,7 +105,7 @@ write an Op:**
                     : %(x)s * log(%(x)s);""" % locals()
             raise NotImplementedError('only floatingpoint is implemented')
     scalar_xlogx  = XlogX(scalar.upgrade_to_float, name='scalar_xlogx')
-    xlogx = theano.tensor.elemwise.Elemwise(scalar_xlogx, name='xlogx')
+    xlogx = aesara.tensor.elemwise.Elemwise(scalar_xlogx, name='xlogx')
 
 **It is also necessary to talk about UnaryScalarOp vs. BinaryOp.**
 
@@ -122,7 +124,7 @@ Mammouth
 
 **This is internal documentation. Guillaume can you make sure to hit these points:**
 
-export THEANO_BLAS_LDFLAGS='-lmkl -liomp5 -fopenmp'
+export AESARA_BLAS_LDFLAGS='-lmkl -liomp5 -fopenmp'
 
 **Do we want the following:**
 
@@ -147,7 +149,7 @@ More simple numpy stuff
         ``x.reshape(x.size)``
         You can also use ``resize`` but there is not reason to ''resize''
     * How do you convert the type of a numpy array?
-        ``theano._asarray(x, dtype = 'int32')``
+        ``aesara._asarray(x, dtype = 'int32')``
         Note that using ``numpy.asarray`` is potentially dangerous, due to
         a problem in numpy where the type may not be properly set (see
         numpy's Track ticket #870).
@@ -157,5 +159,5 @@ More simple numpy stuff
 How to reuse (overwrite) a storage tensor
 =========================================
 
-``theano.compile.io.Out(gw1, borrow = True)`` for that value in
-``theano.compile.function.function``
+``aesara.compile.io.Out(gw1, borrow = True)`` for that value in
+``aesara.compile.function.function``
diff --git a/doc/sandbox/software.txt b/doc/sandbox/software.txt
index 911d1ea8ec..2929872e4c 100644
--- a/doc/sandbox/software.txt
+++ b/doc/sandbox/software.txt
@@ -14,7 +14,6 @@ Other software to look at and maybe recommend to users:
 * [http://www.logilab.org/857 pylint] - Syntax checker for python to
     help beautify your code. (We'd be hypocrites to recommend this :)
 * [http://www.winpdb.org/ Winpdb] - A Platform Independent Python
-    Debugger. (Except it doesn't really help you debug Theano graphs)
+    Debugger. (Except it doesn't really help you debug Aesara graphs)
 * [http://wiki.python.org/moin/IntegratedDevelopmentEnvironments Python
     Integrated Development Environments] - for all your coding needs
-
diff --git a/doc/sandbox/sparse.txt b/doc/sandbox/sparse.txt
index 1d079802c8..9d9c64417c 100644
--- a/doc/sandbox/sparse.txt
+++ b/doc/sandbox/sparse.txt
@@ -114,7 +114,7 @@ Misc
 ----
 The sparse equivalent of dmatrix is csc_matrix and csr_matrix.
 
-:class:`~theano.sparse.basic.Dot` vs. :class:`~theano.sparse.basic.StructuredDot`
+:class:`~aesara.sparse.basic.Dot` vs. :class:`~aesara.sparse.basic.StructuredDot`
 ---------------------------------------------------------------------------------
 
 Often when you use a sparse matrix it is because there is a meaning to the
diff --git a/doc/scripts/docgen.py b/doc/scripts/docgen.py
index 344955c721..a8a6cb9b5c 100644
--- a/doc/scripts/docgen.py
+++ b/doc/scripts/docgen.py
@@ -49,14 +49,14 @@ def mkdir(path):
     mkdir(outdir)
     os.chdir(outdir)
 
-    # Make sure the appropriate 'theano' directory is in the PYTHONPATH
+    # Make sure the appropriate 'aesara' directory is in the PYTHONPATH
     pythonpath = os.environ.get('PYTHONPATH', '')
     pythonpath = os.pathsep.join([throot, pythonpath])
     sys.path[0:0] = [throot]  # We must not use os.environ.
 
     # Make sure we don't use gpu to compile documentation
-    env_th_flags = os.environ.get('THEANO_FLAGS', '')
-    os.environ['THEANO_FLAGS'] = 'device=cpu,force_device=True'
+    env_th_flags = os.environ.get('AESARA_FLAGS', '')
+    os.environ['AESARA_FLAGS'] = 'device=cpu,force_device=True'
 
     def call_sphinx(builder, workdir):
         import sphinx
@@ -95,7 +95,7 @@ def call_sphinx(builder, workdir):
             os.chdir(workdir)
             os.system('make')
             try:
-                shutil.copy(os.path.join(workdir, 'theano.pdf'), outdir)
+                shutil.copy(os.path.join(workdir, 'aesara.pdf'), outdir)
                 os.chdir(outdir)
                 shutil.rmtree(workdir)
             except OSError as e:
@@ -111,5 +111,5 @@ def call_sphinx(builder, workdir):
     # To go back to the original current directory.
     os.chdir(currentdir)
 
-    # Reset THEANO_FLAGS
-    os.environ['THEANO_FLAGS'] = env_th_flags
+    # Reset AESARA_FLAGS
+    os.environ['AESARA_FLAGS'] = env_th_flags
diff --git a/doc/theano-full.bib b/doc/theano-full.bib
deleted file mode 100644
index fc4dba96a0..0000000000
--- a/doc/theano-full.bib
+++ /dev/null
@@ -1,124 +0,0 @@
-@ARTICLE{2016arXiv160502688full,
-   author = {
-     Rami Al-Rfou and
-     Guillaume Alain and
-     Amjad Almahairi and
-     Christof Angermueller and
-     Dzmitry Bahdanau and
-     Nicolas Ballas and
-     Fr\'ed\'eric Bastien and
-     Justin Bayer and
-     Anatoly Belikov and
-     Alexander Belopolsky and
-     Yoshua Bengio and
-     Arnaud Bergeron and
-     James Bergstra and
-     Valentin Bisson and
-     Josh {Bleecher Snyder} and
-     Nicolas Bouchard and
-     Nicolas Boulanger-Lewandowski and
-     Xavier Bouthillier and
-     Alexandre de Br\'ebisson and
-     Olivier Breuleux and
-     Pierre-Luc Carrier and
-     Kyunghyun Cho and
-     Jan Chorowski and
-     Paul Christiano and
-     Tim Cooijmans and
-     Marc-Alexandre C\^ot\'e and
-     Myriam C\^ot\'e and
-     Aaron Courville and
-     Yann N. Dauphin and
-     Olivier Delalleau and
-     Julien Demouth and
-     Guillaume Desjardins and
-     Sander Dieleman and
-     Laurent Dinh and
-     M\'elanie Ducoffe and
-     Vincent Dumoulin and
-     Samira {Ebrahimi Kahou} and
-     Dumitru Erhan and
-     Ziye Fan and
-     Orhan Firat and
-     Mathieu Germain and
-     Xavier Glorot and
-     Ian Goodfellow and
-     Matt Graham and
-     Caglar Gulcehre and
-     Philippe Hamel and
-     Iban Harlouchet and
-     Jean-Philippe Heng and
-     Bal\'azs Hidasi and
-     Sina Honari and
-     Arjun Jain and
-     S\'ebastien Jean and
-     Kai Jia and
-     Mikhail Korobov and
-     Vivek Kulkarni and
-     Alex Lamb and
-     Pascal Lamblin and
-     Eric Larsen and
-     C\'esar Laurent and
-     Sean Lee and
-     Simon Lefrancois and
-     Simon Lemieux and
-     Nicholas L\'eonard and
-     Zhouhan Lin and
-     Jesse A. Livezey and
-     Cory Lorenz and
-     Jeremiah Lowin and
-     Qianli Ma and
-     Pierre-Antoine Manzagol and
-     Olivier Mastropietro and
-     Robert T. McGibbon and
-     Roland Memisevic and
-     Bart van Merri\"enboer and
-     Vincent Michalski and
-     Mehdi Mirza and
-     Alberto Orlandi and
-     Christopher Pal and
-     Razvan Pascanu and
-     Mohammad Pezeshki and
-     Colin Raffel and
-     Daniel Renshaw and
-     Matthew Rocklin and
-     Adriana Romero and
-     Markus Roth and
-     Peter Sadowski and
-     John Salvatier and
-     Fran\c{c}ois Savard and
-     Jan Schl\"uter and
-     John Schulman and
-     Gabriel Schwartz and
-     Iulian Vlad Serban and
-     Dmitriy Serdyuk and
-     Samira Shabanian and
-     \'Etienne Simon and
-     Sigurd Spieckermann and
-     S. Ramana Subramanyam and
-     Jakub Sygnowski and
-     J\'er\'emie Tanguay and
-     Gijs van Tulder and
-     Joseph Turian and
-     Sebastian Urban and
-     Pascal Vincent and
-     Francesco Visin and
-     Harm de Vries and
-     David Warde-Farley and
-     Dustin J. Webb and
-     Matthew Willson and
-     Kelvin Xu and
-     Lijun Xue and
-     Li Yao and
-     Saizheng Zhang and
-     Ying Zhang},
- collaboration = {Theano Development Team},
-    title = "{Theano: A {Python} framework for fast computation of mathematical expressions}",
-  journal = {arXiv e-prints},
-   volume = {abs/1605.02688},
- primaryClass = "cs.SC",
- keywords = {Computer Science - Symbolic Computation, Computer Science - Learning, Computer Science - Mathematical Software},
-     year = 2016,
-    month = may,
-      url = {http://arxiv.org/abs/1605.02688},
-}
diff --git a/doc/theano-short.bib b/doc/theano-short.bib
deleted file mode 100644
index a251b5aa68..0000000000
--- a/doc/theano-short.bib
+++ /dev/null
@@ -1,11 +0,0 @@
-@ARTICLE{2016arXiv160502688short,
-   author = {{Theano Development Team}},
-    title = "{Theano: A {Python} framework for fast computation of mathematical expressions}",
-  journal = {arXiv e-prints},
-   volume = {abs/1605.02688},
- primaryClass = "cs.SC",
- keywords = {Computer Science - Symbolic Computation, Computer Science - Learning, Computer Science - Mathematical Software},
-     year = 2016,
-    month = may,
-      url = {http://arxiv.org/abs/1605.02688},
-}
diff --git a/doc/troubleshooting.txt b/doc/troubleshooting.txt
index fd4c91e3c5..c74a9374fe 100644
--- a/doc/troubleshooting.txt
+++ b/doc/troubleshooting.txt
@@ -10,12 +10,12 @@ Here are Linux troubleshooting instructions. There is a specific `MacOS`_ sectio
 - :ref:`TensorVariable_TypeError`
 - :ref:`out_of_memory`
 - :ref:`float64_output`
-- :ref:`test_theano`
+- :ref:`test_aesara`
 - :ref:`test_BLAS`
 
 .. _network_error_proxy:
 
-Why do I get a network error when I install Theano
+Why do I get a network error when I install Aesara
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 If you are behind a proxy, you must do some extra configuration steps
@@ -32,13 +32,13 @@ How to solve TypeError: object of type 'TensorVariable' has no len()
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 If you receive the following error, it is because the Python function *__len__* cannot
-be implemented on Theano variables:
+be implemented on Aesara variables:
 
 .. code-block:: python
 
    TypeError: object of type 'TensorVariable' has no len()
 
-Python requires that *__len__* returns an integer, yet it cannot be done as Theano's variables are symbolic. However, `var.shape[0]` can be used as a workaround.
+Python requires that *__len__* returns an integer, yet it cannot be done as Aesara's variables are symbolic. However, `var.shape[0]` can be used as a workaround.
 
 This error message cannot be made more explicit because the relevant aspects of Python's
 internals cannot be modified.
@@ -48,7 +48,7 @@ internals cannot be modified.
 How to solve Out of memory Error
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Occasionally Theano may fail to allocate memory when there appears to be more
+Occasionally Aesara may fail to allocate memory when there appears to be more
 than enough reporting:
 
     Error allocating X bytes of device memory (out of memory). Driver report Y
@@ -66,7 +66,7 @@ fragmentation which can makes it more difficult to find contiguous regions
 of memory of sufficient size during subsequent memory allocations.
 
 A known example is related to writing data to shared variables. When updating a
-shared variable Theano will allocate new space if the size of the data does not
+shared variable Aesara will allocate new space if the size of the data does not
 match the size of the space already assigned to the variable. This can lead to
 memory fragmentation which means that a continugous block of memory of
 sufficient capacity may not be available even if the free memory overall is
@@ -74,7 +74,7 @@ large enough.
 
 .. _float64_output:
 
-theano.function returns a float64 when the inputs are float32 and int{32, 64}
+aesara.function returns a float64 when the inputs are float32 and int{32, 64}
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 It should be noted that using float32 and int{32, 64} together
@@ -84,15 +84,15 @@ Since the GPU can't compute this kind of output, it would be
 preferable not to use those dtypes together.
 
 To help you find where float64 are created, see the
-:attr:`warn_float64` Theano flag.
+:attr:`warn_float64` Aesara flag.
 
-.. _test_theano:
+.. _test_aesara:
 
-How to test that Theano works properly
+How to test that Aesara works properly
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-An easy way to check something that could be wrong is by making sure ``THEANO_FLAGS``
-have the desired values as well as the ``~/.theanorc``
+An easy way to check something that could be wrong is by making sure ``AESARA_FLAGS``
+have the desired values as well as the ``~/.aesararc``
 
 Also, check the following outputs :
 
@@ -102,28 +102,28 @@ Also, check the following outputs :
 
 .. code-block:: python
 
-    import theano
-    theano.__file__
-    theano.__version__
+    import aesara
+    aesara.__file__
+    aesara.__version__
 
 
-Once you have installed Theano, you should run the test suite in the ``tests`` directory.
+Once you have installed Aesara, you should run the test suite in the ``tests`` directory.
 
 .. code-block:: bash
 
     python -c "import numpy; numpy.test()"
     python -c "import scipy; scipy.test()"
     pip install pytest
-    THEANO_FLAGS='' pytest tests/
+    AESARA_FLAGS='' pytest tests/
 
-All Theano tests should pass (skipped tests and known failures are normal). If
+All Aesara tests should pass (skipped tests and known failures are normal). If
 some test fails on your machine, you are encouraged to tell us what went
-wrong on the ``theano-users@googlegroups.com`` mailing list.
+wrong in the GitHub issues.
 
 .. warning::
-    Theano's test should **NOT** be run with ``device=cuda``
+    Aesara's test should **NOT** be run with ``device=cuda``
     or they will fail. The tests automatically use the gpu, if any, when
-    needed. If you don't want Theano to ever use the gpu when running tests,
+    needed. If you don't want Aesara to ever use the gpu when running tests,
     you can set :attr:`config.device` to ``cpu`` and
     :attr:`config.force_device` to ``True``.
 
@@ -139,7 +139,7 @@ CPU and GPU memory usage.
 Could speed up and lower memory usage:
 
 - :ref:`cuDNN <libdoc_gpuarray_dnn>` default cuDNN convolution use less
-   memory then Theano version. But some flags allow it to use more
+   memory then Aesara version. But some flags allow it to use more
    memory. GPU only.
 
 Could raise memory usage but speed up computation:
@@ -156,8 +156,8 @@ Could lower the memory usage, but raise computation time:
 - :attr:`config.scan__allow_gc` = True # Probably not significant slowdown on the GPU if memory cache is not disabled
 - :attr:`config.scan__allow_output_prealloc` =False
 - Use :func:`batch_normalization()
-  <theano.tensor.nnet.bn.batch_normalization>`. It use less memory
-  then building a corresponding Theano graph.
+  <aesara.tensor.nnet.batchnorm.batch_normalization>`. It use less memory
+  then building a corresponding Aesara graph.
 - Disable one or scan more optimizations:
     - ``optimizer_excluding=scanOp_pushout_seqs_ops``
     - ``optimizer_excluding=scan_pushout_dot1``
@@ -167,27 +167,27 @@ Could lower the memory usage, but raise computation time:
 - `float16 <https://github.com/Theano/Theano/issues/2908>`_.
 
 If you want to analyze the memory usage during computation, the
-simplest is to let the memory error happen during Theano execution and
-use the Theano flags :attr:`exception_verbosity=high`.
+simplest is to let the memory error happen during Aesara execution and
+use the Aesara flags :attr:`exception_verbosity=high`.
 
 .. _test_BLAS:
 
 How do I configure/test my BLAS library
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-There are many ways to configure BLAS for Theano. This is done with the Theano
+There are many ways to configure BLAS for Aesara. This is done with the Aesara
 flags ``blas__ldflags`` (:ref:`libdoc_config`). The default is to use the BLAS
 installation information in NumPy, accessible via
-``numpy.distutils.__config__.show()``.  You can tell theano to use a different
+``numpy.distutils.__config__.show()``.  You can tell aesara to use a different
 version of BLAS, in case you did not compile NumPy with a fast BLAS or if NumPy
 was compiled with a static library of BLAS (the latter is not supported in
-Theano).
+Aesara).
 
-The short way to configure the Theano flags ``blas__ldflags`` is by setting the
-environment variable :envvar:`THEANO_FLAGS` to ``blas__ldflags=XXX`` (in bash
-``export THEANO_FLAGS=blas__ldflags=XXX``)
+The short way to configure the Aesara flags ``blas__ldflags`` is by setting the
+environment variable :envvar:`AESARA_FLAGS` to ``blas__ldflags=XXX`` (in bash
+``export AESARA_FLAGS=blas__ldflags=XXX``)
 
-The ``${HOME}/.theanorc`` file is the simplest way to set a relatively
+The ``${HOME}/.aesararc`` file is the simplest way to set a relatively
 permanent option like this one.  Add a ``[blas]`` section with an ``ldflags``
 entry like this:
 
@@ -199,7 +199,7 @@ entry like this:
 
     # other stuff can go here
 
-For more information on the formatting of ``~/.theanorc`` and the
+For more information on the formatting of ``~/.aesararc`` and the
 configuration options that you can put there, see :ref:`libdoc_config`.
 
 Here are some different way to configure BLAS:
@@ -211,14 +211,14 @@ static library).
 
 1) Disable the usage of BLAS and fall back on NumPy for dot products. To do
 this, set the value of ``blas__ldflags`` as the empty string (ex: ``export
-THEANO_FLAGS=blas__ldflags=``). Depending on the kind of matrix operations your
-Theano code performs, this might slow some things down (vs. linking with BLAS
+AESARA_FLAGS=blas__ldflags=``). Depending on the kind of matrix operations your
+Aesara code performs, this might slow some things down (vs. linking with BLAS
 directly).
 
 2) You can install the default (reference) version of BLAS if the NumPy version
-(against which Theano links) does not work. If you have root or sudo access in
+(against which Aesara links) does not work. If you have root or sudo access in
 fedora you can do ``sudo yum install blas blas-devel``. Under Ubuntu/Debian
-``sudo apt-get install libblas-dev``. Then use the Theano flags
+``sudo apt-get install libblas-dev``. Then use the Aesara flags
 ``blas__ldflags=-lblas``. Note that the default version of blas is not optimized.
 Using an optimized version can give up to 10x speedups in the BLAS functions
 that we use.
@@ -230,11 +230,11 @@ speed-ups of up to 3x, especially on more recent computers, against the
 precompiled one). On Fedora, ``sudo yum install atlas-devel``. Under Ubuntu,
 ``sudo apt-get install libatlas-base-dev libatlas-base`` or
 ``libatlas3gf-sse2`` if your CPU supports SSE2 instructions. Then set the
-Theano flags ``blas__ldflags`` to ``-lf77blas -latlas -lgfortran``. Note that
+Aesara flags ``blas__ldflags`` to ``-lf77blas -latlas -lgfortran``. Note that
 these flags are sometimes OS-dependent.
 
 4) Use a faster version like MKL, GOTO, ... You are on your own to install it.
-See the doc of that software and set the Theano flags ``blas__ldflags``
+See the doc of that software and set the Aesara flags ``blas__ldflags``
 correctly (for example, for MKL this might be ``-lmkl -lguide -lpthread`` or
 ``-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lguide -liomp5 -lmkl_mc
 -lpthread``).
@@ -243,13 +243,13 @@ correctly (for example, for MKL this might be ``-lmkl -lguide -lpthread`` or
 
     Make sure your BLAS
     libraries are available as dynamically-loadable libraries.
-    ATLAS is often installed only as a static library.  Theano is not able to
+    ATLAS is often installed only as a static library.  Aesara is not able to
     use this static library. Your ATLAS installation might need to be modified
     to provide dynamically loadable libraries.  (On Linux this
     typically means a library whose name ends with .so. On Windows this will be
     a .dll, and on OS-X it might be either a .dylib or a .so.)
 
-    This might be just a problem with the way Theano passes compilation
+    This might be just a problem with the way Aesara passes compilation
     arguments to g++, but the problem is not fixed yet.
 
 .. note::
@@ -264,41 +264,41 @@ correctly (for example, for MKL this might be ``-lmkl -lguide -lpthread`` or
 
     If you have error that contain "gfortran" in it, like this one:
 
-        ImportError: ('/home/Nick/.theano/compiledir_Linux-2.6.35-31-generic-x86_64-with-Ubuntu-10.10-maverick--2.6.6/tmpIhWJaI/0c99c52c82f7ddc775109a06ca04b360.so: undefined symbol: _gfortran_st_write_done'
+        ImportError: ('/home/Nick/.aesara/compiledir_Linux-2.6.35-31-generic-x86_64-with-Ubuntu-10.10-maverick--2.6.6/tmpIhWJaI/0c99c52c82f7ddc775109a06ca04b360.so: undefined symbol: _gfortran_st_write_done'
 
     The problem is probably that NumPy is linked with a different blas
     then then one currently available (probably ATLAS). There is 2
     possible fixes:
 
     1) Uninstall ATLAS and install OpenBLAS.
-    2) Use the Theano flag "blas__ldflags=-lblas -lgfortran"
+    2) Use the Aesara flag "blas__ldflags=-lblas -lgfortran"
 
     1) is better as OpenBLAS is faster then ATLAS and NumPy is
     probably already linked with it. So you won't need any other
-    change in Theano files or Theano configuration.
+    change in Aesara files or Aesara configuration.
 
 Testing BLAS
 ------------
 
-It is recommended to test your Theano/BLAS integration. There are many versions
+It is recommended to test your Aesara/BLAS integration. There are many versions
 of BLAS that exist and there can be up to 10x speed difference between them.
-Also, having Theano link directly against BLAS instead of using NumPy/SciPy as
+Also, having Aesara link directly against BLAS instead of using NumPy/SciPy as
 an intermediate layer reduces the computational overhead. This is
 important for BLAS calls to ``ger``, ``gemv`` and small ``gemm`` operations
 (automatically called when needed when you use ``dot()``). To run the
-Theano/BLAS speed test:
+Aesara/BLAS speed test:
 
 .. code-block:: bash
 
-    python `python -c "import os, theano; print(os.path.dirname(theano.__file__))"`/misc/check_blas.py
+    python `python -c "import os, aesara; print(os.path.dirname(aesara.__file__))"`/misc/check_blas.py
 
 This will print a table with different versions of BLAS/numbers of
-threads on multiple CPUs and GPUs. It will also print some Theano/NumPy
+threads on multiple CPUs and GPUs. It will also print some Aesara/NumPy
 configuration information. Then, it will print the running time of the same
 benchmarks for your installation. Try to find a CPU similar to yours in
 the table, and check that the single-threaded timings are roughly the same.
 
-Theano should link to a parallel version of Blas and use all cores
+Aesara should link to a parallel version of Blas and use all cores
 when possible. By default it should use all cores. Set the environment
 variable "OMP_NUM_THREADS=N" to specify to use N threads.
 
@@ -308,14 +308,14 @@ variable "OMP_NUM_THREADS=N" to specify to use N threads.
 Mac OS
 ------
 
-Although the above steps should be enough, running Theano on a Mac may
+Although the above steps should be enough, running Aesara on a Mac may
 sometimes cause unexpected crashes, typically due to multiple versions of
 Python or other system libraries. If you encounter such problems, you may
 try the following.
 
 - You can ensure MacPorts shared libraries are given priority at run-time
   with ``export LD_LIBRARY_PATH=/opt/local/lib:$LD_LIBRARY_PATH``. In order
-  to do the same at compile time, you can add to your ``~/.theanorc``:
+  to do the same at compile time, you can add to your ``~/.aesararc``:
 
     .. code-block:: cfg
 
@@ -323,13 +323,5 @@ try the following.
       cxxflags = -L/opt/local/lib
 
 - More generally, to investigate libraries issues, you can use the ``otool -L``
-  command on ``.so`` files found under your ``~/.theano`` directory. This will
+  command on ``.so`` files found under your ``~/.aesara`` directory. This will
   list shared libraries dependencies, and may help identify incompatibilities.
-
-.. _theano-users: http://groups.google.com/group/theano-users?pli=1
-
-Please inform us if you have trouble installing and running Theano on your Mac.
-We would be especially interested in dependencies that we missed listing,
-alternate installation steps, GPU instructions, as well as tests that fail on
-your platform (use the ``theano-users@googlegroups.com`` mailing list, but
-note that you must first register to it, by going to `theano-users`_).
diff --git a/doc/tutorial/adding.txt b/doc/tutorial/adding.txt
index b8aea553f4..cda9c1c213 100644
--- a/doc/tutorial/adding.txt
+++ b/doc/tutorial/adding.txt
@@ -7,13 +7,13 @@ Baby Steps - Algebra
 Adding two Scalars
 ==================
 
-To get us started with Theano and get a feel of what we're working with,
+To get us started with Aesara and get a feel of what we're working with,
 let's make a simple function: add two numbers together. Here is how you do
 it:
 
 >>> import numpy
->>> import theano.tensor as tt
->>> from theano import function
+>>> import aesara.tensor as tt
+>>> from aesara import function
 >>> x = tt.dscalar('x')
 >>> y = tt.dscalar('y')
 >>> z = x + y
@@ -41,16 +41,16 @@ instruction. Behind the scene, *f* was being compiled into C code.
 .. note:
 
   A *Variable* is the main data structure you work with when
-  using Theano. The symbolic inputs that you operate on are
+  using Aesara. The symbolic inputs that you operate on are
   *Variables* and what you get from applying various operations to
   these inputs are also *Variables*. For example, when I type
 
-  >>> x = theano.tensor.ivector()
+  >>> x = aesara.tensor.ivector()
   >>> y = -x
 
   *x* and *y* are both Variables, i.e. instances of the
-  ``theano.graph.basic.Variable`` class. The
-  type of both *x* and *y* is ``theano.tensor.ivector``.
+  ``aesara.graph.basic.Variable`` class. The
+  type of both *x* and *y* is ``aesara.tensor.ivector``.
 
 
 **Step 1**
@@ -58,18 +58,18 @@ instruction. Behind the scene, *f* was being compiled into C code.
 >>> x = tt.dscalar('x')
 >>> y = tt.dscalar('y')
 
-In Theano, all symbols must be typed. In particular, ``tt.dscalar``
+In Aesara, all symbols must be typed. In particular, ``tt.dscalar``
 is the type we assign to "0-dimensional arrays (`scalar`) of doubles
-(`d`)". It is a Theano :ref:`type`.
+(`d`)". It is an Aesara :ref:`type`.
 
 ``dscalar`` is not a class. Therefore, neither *x* nor *y*
 are actually instances of ``dscalar``. They are instances of
 :class:`TensorVariable`. *x* and *y*
-are, however, assigned the theano Type ``dscalar`` in their ``type``
+are, however, assigned the aesara Type ``dscalar`` in their ``type``
 field, as you can see here:
 
 >>> type(x)
-<class 'theano.tensor.var.TensorVariable'>
+<class 'aesara.tensor.var.TensorVariable'>
 >>> x.type
 TensorType(float64, scalar)
 >>> tt.dscalar
@@ -82,7 +82,7 @@ By calling ``tt.dscalar`` with a string argument, you create a
 given name. If you provide no argument, the symbol will be unnamed. Names
 are not required, but they can help debugging.
 
-More will be said in a moment regarding Theano's inner structure. You
+More will be said in a moment regarding Aesara's inner structure. You
 could also learn more by looking into :ref:`graphstructures`.
 
 
@@ -96,7 +96,7 @@ The second step is to combine *x* and *y* into their sum *z*:
 *x* and *y*. You can use the :ref:`pp <libdoc_printing>`
 function to pretty-print out the computation associated to *z*.
 
->>> from theano import pp
+>>> from aesara import pp
 >>> print(pp(z))
 (x + y)
 
@@ -117,21 +117,21 @@ then be used like a normal Python function.
 .. note::
 
     As a shortcut, you can skip step 3, and just use a variable's
-    :func:`eval <theano.graph.basic.Variable.eval>` method.
+    :func:`eval <aesara.graph.basic.Variable.eval>` method.
     The :func:`eval` method is not as flexible
     as :func:`function` but it can do everything we've covered in
     the tutorial so far. It has the added benefit of not requiring
     you to import :func:`function` . Here is how :func:`eval` works:
 
     >>> import numpy
-    >>> import theano.tensor as tt
+    >>> import aesara.tensor as tt
     >>> x = tt.dscalar('x')
     >>> y = tt.dscalar('y')
     >>> z = x + y
     >>> numpy.allclose(z.eval({x : 16.3, y : 12.1}), 28.4)
     True
 
-    We passed :func:`eval` a dictionary mapping symbolic theano
+    We passed :func:`eval` a dictionary mapping symbolic aesara
     variables to the values to substitute for them, and it returned
     the numerical value of the expression.
 
@@ -199,10 +199,10 @@ Exercise
 
 .. testcode::
 
-   import theano
-   a = theano.tensor.vector() # declare variable
+   import aesara
+   a = aesara.tensor.vector() # declare variable
    out = a + a ** 10               # build symbolic expression
-   f = theano.function([a], out)   # compile function
+   f = aesara.function([a], out)   # compile function
    print(f([0, 1, 2]))
 
 .. testoutput::
diff --git a/doc/tutorial/adding_solution_1.py b/doc/tutorial/adding_solution_1.py
index 142f756c05..de8286596d 100755
--- a/doc/tutorial/adding_solution_1.py
+++ b/doc/tutorial/adding_solution_1.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python
-# Theano tutorial
+# Aesara tutorial
 # Solution to Exercise in section 'Baby Steps - Algebra'
 
 
-import theano
-a = theano.tensor.vector()  # declare variable
-b = theano.tensor.vector()  # declare variable
+import aesara
+a = aesara.tensor.vector()  # declare variable
+b = aesara.tensor.vector()  # declare variable
 out = a ** 2 + b ** 2 + 2 * a * b  # build symbolic expression
-f = theano.function([a, b], out)   # compile function
+f = aesara.function([a, b], out)   # compile function
 print(f([1, 2], [4, 5]))  # prints [ 25.  49.]
diff --git a/doc/tutorial/aliasing.txt b/doc/tutorial/aliasing.txt
index 87b81c0cc8..6b7cb47824 100644
--- a/doc/tutorial/aliasing.txt
+++ b/doc/tutorial/aliasing.txt
@@ -5,11 +5,11 @@
 Understanding Memory Aliasing for Speed and Correctness
 =======================================================
 
-The aggressive reuse of memory is one of the ways through which Theano makes code fast, and
+The aggressive reuse of memory is one of the ways through which Aesara makes code fast, and
 it is important for the correctness and speed of your program that you understand
-how Theano might alias buffers.
+how Aesara might alias buffers.
 
-This section describes the principles based on which Theano handles memory, and explains
+This section describes the principles based on which Aesara handles memory, and explains
 when you might want to alter the default behaviour of some functions and
 methods for faster performance.
 
@@ -17,32 +17,32 @@ methods for faster performance.
 The Memory Model: Two Spaces
 ============================
 
-There are some simple principles that guide Theano's handling of memory.  The
-main idea is that there is a pool of memory managed by Theano, and Theano tracks
+There are some simple principles that guide Aesara's handling of memory.  The
+main idea is that there is a pool of memory managed by Aesara, and Aesara tracks
 changes to values in that pool.
 
-* Theano manages its own memory space, which typically does not overlap with
-  the memory of normal Python variables that non-Theano code creates.
+* Aesara manages its own memory space, which typically does not overlap with
+  the memory of normal Python variables that non-Aesara code creates.
 
-* Theano functions only modify buffers that are in Theano's memory space.
+* Aesara functions only modify buffers that are in Aesara's memory space.
 
-* Theano's memory space includes the buffers allocated to store ``shared``
+* Aesara's memory space includes the buffers allocated to store ``shared``
   variables and the temporaries used to evaluate functions.
 
-* Physically, Theano's memory space may be spread across the host, a GPU
+* Physically, Aesara's memory space may be spread across the host, a GPU
   device(s), and in the future may even include objects on a remote machine.
 
 * The memory allocated for a ``shared`` variable buffer is unique: it is never
   aliased to another ``shared`` variable.
 
-* Theano's managed memory is constant while Theano functions are not running
-  and Theano's library code is not running.
+* Aesara's managed memory is constant while Aesara functions are not running
+  and Aesara's library code is not running.
 
 * The default behaviour of a function is to return user-space values for
   outputs, and to expect user-space values for inputs.
 
-The distinction between Theano-managed memory and user-managed memory can be
-broken down by some Theano functions (e.g. ``shared``, ``get_value`` and the
+The distinction between Aesara-managed memory and user-managed memory can be
+broken down by some Aesara functions (e.g. ``shared``, ``get_value`` and the
 constructors for ``In`` and ``Out``) by using a ``borrow=True`` flag.
 This can make those methods faster (by avoiding copy operations) at the expense
 of risking subtle bugs in the overall program (by aliasing memory).
@@ -57,12 +57,12 @@ A ``borrow`` argument can be provided to the shared-variable constructor.
 
 .. testcode:: borrow
 
-   import numpy, theano
+   import numpy, aesara
    np_array = numpy.ones(2, dtype='float32')
 
-   s_default = theano.shared(np_array)
-   s_false   = theano.shared(np_array, borrow=False)
-   s_true    = theano.shared(np_array, borrow=True)
+   s_default = aesara.shared(np_array)
+   s_false   = aesara.shared(np_array, borrow=False)
+   s_true    = aesara.shared(np_array, borrow=True)
 
 By default (*s_default*) and when explicitly setting ``borrow=False``, the
 shared variable we construct gets a [deep] copy of *np_array*.  So changes we
@@ -91,9 +91,9 @@ object as it's internal buffer.
 
 However, this aliasing of *np_array* and *s_true* is not guaranteed to occur,
 and may occur only temporarily even if it occurs at all.
-It is not guaranteed to occur because if Theano is using a GPU device, then the
+It is not guaranteed to occur because if Aesara is using a GPU device, then the
 ``borrow`` flag has no effect. It may occur only temporarily because
-if we call a Theano function that updates the value of *s_true* the aliasing
+if we call an Aesara function that updates the value of *s_true* the aliasing
 relationship *may* or *may not* be broken (the function is allowed to
 update the ``shared`` variable by modifying its buffer, which will preserve
 the aliasing, or by changing which buffer the variable points to, which
@@ -122,25 +122,25 @@ retrieved.
 
 .. testcode:: borrow
 
-   s = theano.shared(np_array)
+   s = aesara.shared(np_array)
 
    v_false = s.get_value(borrow=False) # N.B. borrow default is False
    v_true = s.get_value(borrow=True)
 
 
 When ``borrow=False`` is passed to ``get_value``, it means that the return value
-may not be aliased to any part of Theano's internal memory.
+may not be aliased to any part of Aesara's internal memory.
 When ``borrow=True`` is passed to ``get_value``, it means that the return value
-*might* be aliased to some of Theano's internal memory.
+*might* be aliased to some of Aesara's internal memory.
 But both of these calls might create copies of the internal memory.
 
 The reason that ``borrow=True`` might still make a copy is that the internal
 representation of a ``shared`` variable might not be what you expect.  When you
 create a ``shared`` variable by passing a NumPy array for example, then ``get_value()``
-must return a NumPy array too.  That's how Theano can make the GPU use
+must return a NumPy array too.  That's how Aesara can make the GPU use
 transparent.  But when you are using a GPU (or in the future perhaps a remote machine),
 then the numpy.ndarray is not the internal representation of your data.
-If you really want Theano to return its internal representation *and never copy it*
+If you really want Aesara to return its internal representation *and never copy it*
 then you should use the ``return_internal_type=True`` argument to
 ``get_value``.  It will never cast the internal object (always return in
 constant time), but might return various datatypes depending on contextual
@@ -154,7 +154,7 @@ It is possible to use ``borrow=False`` in conjunction with
 ``return_internal_type=True``, which will return a deep copy of the internal object.
 This is primarily for internal debugging, not for typical use.
 
-For the transparent use of different type of optimization Theano can make,
+For the transparent use of different type of optimization Aesara can make,
 there is the policy that ``get_value()`` always return by default the same object type
 it received when the ``shared`` variable was created. So if you created manually data on
 the gpu and create a ``shared`` variable on the gpu with this data, ``get_value`` will always
@@ -173,7 +173,7 @@ Assigning
 ``Shared`` variables also have a ``set_value`` method that can accept an optional
 ``borrow=True`` argument. The semantics are similar to those of creating a new
 ``shared`` variable - ``borrow=False`` is the default and ``borrow=True`` means
-that Theano *may* reuse the buffer you provide as the internal storage for the variable.
+that Aesara *may* reuse the buffer you provide as the internal storage for the variable.
 
 A standard pattern for manually updating the value of a ``shared`` variable is as
 follows:
@@ -190,29 +190,29 @@ follows:
         borrow=True)
 
 This pattern works regardless of the computing device, and when the latter
-makes it possible to expose Theano's internal variables without a copy, then it
+makes it possible to expose Aesara's internal variables without a copy, then it
 proceeds as fast as an in-place update.
 
 
 When ``shared`` variables are allocated on the GPU, the transfers to and from the GPU device memory can
 be costly.  Here are a few tips to ensure fast and efficient use of GPU memory and bandwidth:
 
-* Prior to Theano 0.3.1, ``set_value`` did not work in-place on the GPU. This meant that, sometimes,
+* Prior to Aesara 0.3.1, ``set_value`` did not work in-place on the GPU. This meant that, sometimes,
   GPU memory for the new value would be allocated before the old memory was released. If you're
   running near the limits of GPU memory, this could cause you to run out of GPU memory
   unnecessarily.
 
-  *Solution*: update to a newer version of Theano.
+  *Solution*: update to a newer version of Aesara.
 
 * If you are going to swap several chunks of data in and out of a ``shared`` variable repeatedly,
   you will want to reuse the memory that you allocated the first time if possible - it is both
   faster and more memory efficient.
 
-  *Solution*: upgrade to a recent version of Theano (>0.3.0) and consider padding your source
+  *Solution*: upgrade to a recent version of Aesara (>0.3.0) and consider padding your source
   data to make sure that every chunk is the same size.
 
 * It is also worth mentioning that, current GPU copying routines
-  support only contiguous memory.  So Theano must make the value you
+  support only contiguous memory.  So Aesara must make the value you
   provide *C-contiguous* prior to copying it.  This can require an
   extra copy of the data on the host.
 
@@ -228,32 +228,32 @@ Borrowing when Constructing Function Objects
 ============================================
 
 A ``borrow`` argument can also be provided to the ``In`` and ``Out`` objects
-that control how ``theano.function`` handles its argument[s] and return value[s].
+that control how ``aesara.function`` handles its argument[s] and return value[s].
 
 .. testcode::
 
-    import theano
-    import theano.tensor as tt
-    from theano.compile.io import In, Out
+    import aesara
+    import aesara.tensor as tt
+    from aesara.compile.io import In, Out
 
     x = tt.matrix()
     y = 2 * x
-    f = theano.function([In(x, borrow=True)], Out(y, borrow=True))
+    f = aesara.function([In(x, borrow=True)], Out(y, borrow=True))
 
-Borrowing an input means that Theano will treat the argument you provide as if
-it were part of Theano's pool of temporaries.  Consequently, your input
+Borrowing an input means that Aesara will treat the argument you provide as if
+it were part of Aesara's pool of temporaries.  Consequently, your input
 may be reused as a buffer (and overwritten!) during the computation of other variables in the
 course of evaluating that function (e.g. ``f``).
 
 
-Borrowing an output means that Theano will not insist on allocating a fresh
+Borrowing an output means that Aesara will not insist on allocating a fresh
 output buffer every time you call the function.  It will possibly reuse the same one as
 on a previous call, and overwrite the old content.  Consequently, it may overwrite
 old return values through side-effect.
 Those return values may also be overwritten in
 the course of evaluating *another compiled function* (for example, the output
 may be aliased to a ``shared`` variable).  So be careful to use a borrowed return
-value right away before calling any more Theano functions.
+value right away before calling any more Aesara functions.
 The default is of course to *not borrow* internal results.
 
 It is also possible to pass a ``return_internal_type=True`` flag to the ``Out``
@@ -267,7 +267,7 @@ graph.
 *Take home message:*
 
 When an input *x* to a function is not needed after the function
-returns and you would like to make it available to Theano as
+returns and you would like to make it available to Aesara as
 additional workspace, then consider marking it with ``In(x,
 borrow=True)``.  It may make the function faster and reduce its memory
 requirement.  When a return value *y* is large (in terms of memory
diff --git a/doc/tutorial/broadcasting.txt b/doc/tutorial/broadcasting.txt
index 816432a02c..bee9f3001b 100644
--- a/doc/tutorial/broadcasting.txt
+++ b/doc/tutorial/broadcasting.txt
@@ -1,8 +1,8 @@
 .. testsetup::
 
    import numpy as np
-   import theano
-   import theano.tensor as tt
+   import aesara
+   import aesara.tensor as tt
 
 .. _tutbroadcasting:
 
@@ -32,7 +32,7 @@ dimensions of the matrix (adding ``1`` to the shape and ``True``
 to the pattern), resulting in ``(1, 2)`` and ``(True, False)``.
 It would then behave just like the example above.
 
-Unlike numpy which does broadcasting dynamically, Theano needs
+Unlike numpy which does broadcasting dynamically, Aesara needs
 to know, for any operation which supports broadcasting, which
 dimensions will need to be broadcasted. When applicable, this
 information is given in the :ref:`type` of a *Variable*.
@@ -45,7 +45,7 @@ The following code illustrates how rows and columns are broadcasted in order to
 >>> mtr = tt.matrix()
 >>> mtr.broadcastable
 (False, False)
->>> f_row = theano.function([r, mtr], [r + mtr])
+>>> f_row = aesara.function([r, mtr], [r + mtr])
 >>> R = np.arange(3).reshape(1, 3)
 >>> R
 array([[0, 1, 2]])
@@ -61,7 +61,7 @@ array([[0, 1, 2],
 >>> c = tt.col()
 >>> c.broadcastable
 (False, True)
->>> f_col = theano.function([c, mtr], [c + mtr])
+>>> f_col = aesara.function([c, mtr], [c + mtr])
 >>> C = np.arange(3).reshape(3, 1)
 >>> C
 array([[0],
diff --git a/doc/tutorial/conditions.txt b/doc/tutorial/conditions.txt
index 7a49d0c33a..25c7856814 100644
--- a/doc/tutorial/conditions.txt
+++ b/doc/tutorial/conditions.txt
@@ -20,9 +20,9 @@ IfElse vs Switch
 
 .. testcode::
 
-   from theano import tensor as tt
-   from theano.ifelse import ifelse
-   import theano, time, numpy
+   from aesara import tensor as tt
+   from aesara.ifelse import ifelse
+   import aesara, time, numpy
 
    a,b = tt.scalars('a', 'b')
    x,y = tt.matrices('x', 'y')
@@ -30,10 +30,10 @@ IfElse vs Switch
    z_switch = tt.switch(tt.lt(a, b), tt.mean(x), tt.mean(y))
    z_lazy = ifelse(tt.lt(a, b), tt.mean(x), tt.mean(y))
 
-   f_switch = theano.function([a, b, x, y], z_switch,
-                              mode=theano.compile.mode.Mode(linker='vm'))
-   f_lazyifelse = theano.function([a, b, x, y], z_lazy,
-                                  mode=theano.compile.mode.Mode(linker='vm'))
+   f_switch = aesara.function([a, b, x, y], z_switch,
+                              mode=aesara.compile.mode.Mode(linker='vm'))
+   f_lazyifelse = aesara.function([a, b, x, y], z_lazy,
+                                  mode=aesara.compile.mode.Mode(linker='vm'))
 
    val1 = 0.
    val2 = 1.
@@ -83,4 +83,4 @@ this `ticket <http://www.assembla.com/spaces/theano/tickets/764>`_.
    means everything will be computed when we build it, due to Python's
    greedy evaluation and the semantic of test value. As we build both
    branches, they will be executed for test values. This doesn't cause
-   any changes during the execution of the compiled Theano function.
+   any changes during the execution of the compiled Aesara function.
diff --git a/doc/tutorial/conv_arithmetic.txt b/doc/tutorial/conv_arithmetic.txt
index 64c13ac9d4..14edaf2c0f 100644
--- a/doc/tutorial/conv_arithmetic.txt
+++ b/doc/tutorial/conv_arithmetic.txt
@@ -8,7 +8,7 @@ Convolution arithmetic tutorial
 
     This tutorial is adapted from an existing `convolution arithmetic guide
     <https://arxiv.org/abs/1603.07285>`_ [#]_, with an added emphasis on
-    Theano's interface.
+    Aesara's interface.
 
     Also, note that the signal processing community has a different nomenclature
     and a well established literature on the topic, but for this tutorial
@@ -40,7 +40,7 @@ The tutorial's objective is threefold:
 * Provide an intuitive understanding of the relationship between input shape,
   kernel shape, zero padding, strides and output shape in convolutional and
   transposed convolutional layers.
-* Clarify Theano's API on convolutions.
+* Clarify Aesara's API on convolutions.
 
 Refresher: discrete convolutions
 ================================
@@ -167,14 +167,14 @@ simplified setting:
 This facilitates the analysis and the visualization, but keep in mind that the
 results outlined here also generalize to the N-D and non-square cases.
 
-Theano terminology
+Aesara terminology
 ==================
 
-Theano has its own terminology, which differs slightly from the convolution
+Aesara has its own terminology, which differs slightly from the convolution
 arithmetic guide's. Here's a simple conversion table for the two:
 
 +------------------+----------------------------------------------------------------------------------------------------+
-| Theano           | Convolution arithmetic                                                                             |
+| Aesara           | Convolution arithmetic                                                                             |
 +==================+====================================================================================================+
 | ``filters``      | 4D collection of kernels                                                                           |
 +------------------+----------------------------------------------------------------------------------------------------+
@@ -188,11 +188,11 @@ arithmetic guide's. Here's a simple conversion table for the two:
 +------------------+----------------------------------------------------------------------------------------------------+
 
 For instance, the convolution shown above would correspond to the following
-Theano call:
+Aesara call:
 
 .. code-block:: python
 
-    output = theano.tensor.nnet.conv2d(
+    output = aesara.tensor.nnet.conv2d(
         input, filters, input_shape=(1, 1, 5, 5), filter_shape=(1, 1, 3, 3),
         border_mode=(1, 1), subsample=(2, 2))
 
@@ -226,11 +226,11 @@ More formally, the following relationship can be inferred:
 
         o = (i - k) + 1.
 
-    This translates to the following Theano code:
+    This translates to the following Aesara code:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2, k1, k2),
             border_mode=(0, 0), subsample=(1, 1))
         # output.shape[2] == (i1 - k1) + 1
@@ -253,11 +253,11 @@ relationship:
 
         o = (i - k) + 2p + 1.
 
-    This translates to the following Theano code:
+    This translates to the following Aesara code:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2, k1, k2),
             border_mode=(p1, p2), subsample=(1, 1))
         # output.shape[2] == (i1 - k1) + 2 * p1 + 1
@@ -292,11 +292,11 @@ be a desirable property:
             &= i.
         \end{split}
 
-    This translates to the following Theano code:
+    This translates to the following Aesara code:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2, k1, k2),
             border_mode='half', subsample=(1, 1))
         # output.shape[2] == i1
@@ -332,11 +332,11 @@ proper zero padding:
             &= i + (k - 1).
         \end{split}
 
-    This translates to the following Theano code:
+    This translates to the following Aesara code:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2, k1, k2),
             border_mode='full', subsample=(1, 1))
         # output.shape[2] == i1 + (k1 - 1)
@@ -380,11 +380,11 @@ From this, the following relationship can be inferred:
 
         o = \left\lfloor \frac{i - k}{s} \right\rfloor + 1.
 
-    This translates to the following Theano code:
+    This translates to the following Aesara code:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2, k1, k2),
             border_mode=(0, 0), subsample=(s1, s2))
         # output.shape[2] == (i1 - k1) // s1 + 1
@@ -409,11 +409,11 @@ strides) can be derived by applying Relationship 5 on an effective input of size
 
         o = \left\lfloor \frac{i + 2p - k}{s} \right\rfloor + 1.
 
-    This translates to the following Theano code:
+    This translates to the following Aesara code:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2, k1, k2),
             border_mode=(p1, p2), subsample=(s1, s2))
         # output.shape[2] == (i1 - k1 + 2 * p1) // s1 + 1
@@ -627,7 +627,7 @@ for the other elements of the image, giving rise to the following relationship:
 
     .. code-block:: python
 
-        input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+        input = aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
             output, filters, filter_shape=(c1, c2, k1, k2), border_mode=(0, 0),
             subsample=(1, 1))
         # input.shape[2] == output.shape[2] + (k1 - 1)
@@ -667,7 +667,7 @@ Formally, the following relationship applies for zero padded convolutions:
 
     .. code-block:: python
 
-        input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+        input = aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
             output, filters, filter_shape=(c1, c2, k1, k2), border_mode=(p1, p2),
             subsample=(1, 1))
         # input.shape[2] == output.shape[2] + (k1 - 1) - 2 * p1
@@ -704,7 +704,7 @@ applies:
 
     .. code-block:: python
 
-        input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+        input = aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
             output, filters, filter_shape=(c1, c2, k1, k2), border_mode='half',
             subsample=(1, 1))
         # input.shape[2] == output.shape[2]
@@ -740,7 +740,7 @@ the transpose of a fully padded convolution is a non-padded convolution:
 
     .. code-block:: python
 
-        input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+        input = aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
             output, filters, filter_shape=(c1, c2, k1, k2), border_mode='full',
             subsample=(1, 1))
         # input.shape[2] == output.shape[2] - (k1 - 1)
@@ -799,7 +799,7 @@ of :math:`s`. In that case, the following relationship holds:
 
     .. code-block:: python
 
-        input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+        input = aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
             output, filters, filter_shape=(c1, c2, k1, k2), border_mode=(0, 0),
             subsample=(s1, s2))
         # input.shape[2] == s1 * (output.shape[2] - 1) + k1
@@ -832,7 +832,7 @@ combining :ref:`Relationship 8 <Relationship8>` and
 
         o_prime1 = s1 * (output.shape[2] - 1) + k1 - 2 * p1
         o_prime2 = s2 * (output.shape[3] - 1) + k2 - 2 * p2
-        input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+        input = aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
             output, filters, input_shape=(b, c1, o_prime1, o_prime2),
             filter_shape=(c1, c2, k1, k2), border_mode=(p1, p2),
             subsample=(s1, s2))
@@ -867,7 +867,7 @@ between the :math:`s` different cases that all lead to the same :math:`i'`:
 
         o_prime1 = s1 * (output.shape[2] - 1) + a1 + k1 - 2 * p1
         o_prime2 = s2 * (output.shape[3] - 1) + a2 + k2 - 2 * p2
-        input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+        input = aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
             output, filters, input_shape=(b, c1, o_prime1, o_prime2),
             filter_shape=(c1, c2, k1, k2), border_mode=(p1, p2),
             subsample=(s1, s2))
@@ -919,12 +919,12 @@ dilated convolutions:
 
         o = \left\lfloor \frac{i + 2p - k - (k - 1)(d - 1)}{s} \right\rfloor + 1.
 
-    This translates to the following Theano code using the ``filter_dilation``
+    This translates to the following Aesara code using the ``filter_dilation``
     parameter:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2, k1, k2),
             border_mode=(p1, p2), subsample=(s1, s2), filter_dilation=(d1, d2))
         # output.shape[2] == (i1 + 2 * p1 - k1 - (k1 - 1) * (d1 - 1)) // s1 + 1
@@ -963,7 +963,7 @@ An example to use Grouped convolutions would be:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2 / n, k1, k2),
             border_mode=(p1, p2), subsample=(s1, s2), filter_dilation=(d1, d2), num_groups=n)
         # output.shape[0] == b
@@ -1003,7 +1003,7 @@ Separable convolutions is used as follows:
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.separable_conv2d(
+        output = aesara.tensor.nnet.separable_conv2d(
             input, depthwise_filters, pointwise_filters, num_channels = c1,
             input_shape=(b, c1, i1, i2), depthwise_filter_shape=(c2, 1, k1, k2),
             pointwise_filter_shape=(c3, c2, 1, 1), border_mode=(p1, p2),
@@ -1040,11 +1040,11 @@ Quick reference
 
         o = \left\lfloor \frac{i + 2p - k}{s} \right\rfloor + 1.
 
-    In Theano, this translates to
+    In Aesara, this translates to
 
     .. code-block:: python
 
-        output = theano.tensor.nnet.conv2d(
+        output = aesara.tensor.nnet.conv2d(
             input, filters, input_shape=(b, c2, i1, i2), filter_shape=(c1, c2, k1, k2),
             border_mode=(p1, p2), subsample=(s1, s2))
         # output.shape[2] == (i1 + 2 * p1 - k1) // s1 + 1
@@ -1068,13 +1068,13 @@ Quick reference
     where :math:`a` is a user-specified quantity used to distinguish between the
     :math:`s` different possible output sizes.
 
-    Unless :math:`s = 1`, Theano requires that :math:`a` is implicitly passed
+    Unless :math:`s = 1`, Aesara requires that :math:`a` is implicitly passed
     via an ``input_shape`` argument. For instance, if :math:`i = 3`,
     :math:`k = 4`, :math:`s = 2`, :math:`p = 0` and :math:`a = 1`, then
-    :math:`o = 2 (3 - 1) + 1 + 4 = 9` and the Theano code would look like
+    :math:`o = 2 (3 - 1) + 1 + 4 = 9` and the Aesara code would look like
 
     .. code-block:: python
 
-        input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+        input = aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
             output, filters, input_shape=(9, 9), filter_shape=(c1, c2, 4, 4),
             border_mode='valid', subsample=(2, 2))
diff --git a/doc/tutorial/debug_faq.txt b/doc/tutorial/debug_faq.txt
index 1813637766..179566905e 100644
--- a/doc/tutorial/debug_faq.txt
+++ b/doc/tutorial/debug_faq.txt
@@ -2,38 +2,38 @@
 .. _debug_faq:
 
 =========================================
-Debugging Theano: FAQ and Troubleshooting
+Debugging Aesara: FAQ and Troubleshooting
 =========================================
 
 There are many kinds of bugs that might come up in a computer program.
 This page is structured as a FAQ.  It provides recipes to tackle common
 problems, and introduces some of the tools that we use to find problems in our
-own Theano code, and even (it happens) in Theano's internals, in
+own Aesara code, and even (it happens) in Aesara's internals, in
 :ref:`using_debugmode`.
 
-Isolating the Problem/Testing Theano Compiler
+Isolating the Problem/Testing Aesara Compiler
 ---------------------------------------------
 
-You can run your Theano function in a :ref:`DebugMode<using_debugmode>`.
-This tests the Theano optimizations and helps to find where NaN, inf and other problems come from.
+You can run your Aesara function in a :ref:`DebugMode<using_debugmode>`.
+This tests the Aesara optimizations and helps to find where NaN, inf and other problems come from.
 
 Interpreting Error Messages
 ---------------------------
 
-Even in its default configuration, Theano tries to display useful error
+Even in its default configuration, Aesara tries to display useful error
 messages. Consider the following faulty code.
 
 .. testcode::
 
     import numpy as np
-    import theano
-    import theano.tensor as tt
+    import aesara
+    import aesara.tensor as tt
 
     x = tt.vector()
     y = tt.vector()
     z = x + x
     z = z + y
-    f = theano.function([x, y], z)
+    f = aesara.function([x, y], z)
     f(np.ones((2,)), np.ones((3,)))
 
 Running the code above we see:
@@ -50,8 +50,8 @@ Running the code above we see:
    Inputs strides: [(8,), (8,), (8,)]
    Inputs scalar values: ['not scalar', 'not scalar', 'not scalar']
 
-   HINT: Re-running with most Theano optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Theano flags 'optimizer=fast_compile'. If that does not work, Theano optimization can be disabled with 'optimizer=None'.
-   HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint of this apply node.
+   HINT: Re-running with most Aesara optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Aesara flags 'optimizer=fast_compile'. If that does not work, Aesara optimization can be disabled with 'optimizer=None'.
+   HINT: Use the Aesara flag 'exception_verbosity=high' for a debugprint of this apply node.
 
 Arguably the most useful information is approximately half-way through
 the error message, where the kind of error is displayed along with its
@@ -61,7 +61,7 @@ Below it, some other information is given, such as the apply node that
 caused the error, as well as the input types, shapes, strides and
 scalar values.
 
-The two hints can also be helpful when debugging. Using the theano flag
+The two hints can also be helpful when debugging. Using the aesara flag
 ``optimizer=fast_compile`` or ``optimizer=None`` can often tell you
 the faulty line, while ``exception_verbosity=high`` will display a
 debugprint of the apply node. Using these hints, the end of the error
@@ -89,11 +89,11 @@ you could set ``optimizer=None`` or use test values.
 Using Test Values
 -----------------
 
-As of v.0.4.0, Theano has a new mechanism by which graphs are executed
-on-the-fly, before a ``theano.function`` is ever compiled. Since optimizations
+As of v.0.4.0, Aesara has a new mechanism by which graphs are executed
+on-the-fly, before a ``aesara.function`` is ever compiled. Since optimizations
 haven't been applied at this stage, it is easier for the user to locate the
 source of some bug. This functionality is enabled through the config flag
-``theano.config.compute_test_value``. Its use is best shown through the
+``aesara.config.compute_test_value``. Its use is best shown through the
 following example. Here, we use ``exception_verbosity=high`` and
 ``optimizer=fast_compile``, which would not tell you the line at fault.
 ``optimizer=None`` would and it could therefore be used instead of test values.
@@ -102,24 +102,24 @@ following example. Here, we use ``exception_verbosity=high`` and
 .. testcode:: testvalue
 
     import numpy
-    import theano
-    import theano.tensor as tt
+    import aesara
+    import aesara.tensor as tt
 
     # compute_test_value is 'off' by default, meaning this feature is inactive
-    theano.config.compute_test_value = 'off' # Use 'warn' to activate this feature
+    aesara.config.compute_test_value = 'off' # Use 'warn' to activate this feature
 
     # configure shared variables
-    W1val = numpy.random.rand(2, 10, 10).astype(theano.config.floatX)
-    W1 = theano.shared(W1val, 'W1')
-    W2val = numpy.random.rand(15, 20).astype(theano.config.floatX)
-    W2 = theano.shared(W2val, 'W2')
+    W1val = numpy.random.rand(2, 10, 10).astype(aesara.config.floatX)
+    W1 = aesara.shared(W1val, 'W1')
+    W2val = numpy.random.rand(15, 20).astype(aesara.config.floatX)
+    W2 = aesara.shared(W2val, 'W2')
 
     # input which will be of shape (5,10)
     x  = tt.matrix('x')
-    # provide Theano with a default test-value
+    # provide Aesara with a default test-value
     #x.tag.test_value = numpy.random.rand(5, 10)
 
-    # transform the shared variable in some way. Theano does not
+    # transform the shared variable in some way. Aesara does not
     # know off hand that the matrix func_of_W1 has shape (20, 10)
     func_of_W1 = W1.dimshuffle(2, 0, 1).flatten(2).T
 
@@ -130,7 +130,7 @@ following example. Here, we use ``exception_verbosity=high`` and
     h2 = tt.dot(h1, W2.T)
 
     # compile and call the actual function
-    f = theano.function([x], h2)
+    f = aesara.function([x], h2)
     f(numpy.random.rand(5, 10))
 
 Running the above code generates the following error message:
@@ -140,9 +140,9 @@ Running the above code generates the following error message:
     Traceback (most recent call last):
       File "test1.py", line 31, in <module>
         f(numpy.random.rand(5, 10))
-      File "PATH_TO_THEANO/theano/compile/function/types.py", line 605, in __call__
+      File "PATH_TO_AESARA/aesara/compile/function/types.py", line 605, in __call__
         self.fn.thunks[self.fn.position_of_error])
-      File "PATH_TO_THEANO/theano/compile/function/types.py", line 595, in __call__
+      File "PATH_TO_AESARA/aesara/compile/function/types.py", line 595, in __call__
         outputs = self.fn()
     ValueError: Shape mismatch: x has 10 cols (and 5 rows) but y has 20 rows (and 10 cols)
     Apply node that caused the error: Dot22(x, DimShuffle{1,0}.0)
@@ -159,25 +159,25 @@ Running the above code generates the following error message:
          |DimShuffle{2,0,1} [id E] <TensorType(float64, 3D)> ''
            |W1 [id F] <TensorType(float64, 3D)>
 
-    HINT: Re-running with most Theano optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Theano flags 'optimizer=fast_compile'. If that does not work, Theano optimization can be disabled with 'optimizer=None'.
+    HINT: Re-running with most Aesara optimization disabled could give you a back-traces when this node was created. This can be done with by setting the Aesara flags 'optimizer=fast_compile'. If that does not work, Aesara optimization can be disabled with 'optimizer=None'.
 
 If the above is not informative enough, by instrumenting the code ever
-so slightly, we can get Theano to reveal the exact source of the error.
+so slightly, we can get Aesara to reveal the exact source of the error.
 
 .. code-block:: python
 
     # enable on-the-fly graph computations
-    theano.config.compute_test_value = 'warn'
+    aesara.config.compute_test_value = 'warn'
 
     ...
 
     # input which will be of shape (5, 10)
     x  = tt.matrix('x')
-    # provide Theano with a default test-value
+    # provide Aesara with a default test-value
     x.tag.test_value = numpy.random.rand(5, 10)
 
 In the above, we are tagging the symbolic matrix *x* with a special test
-value. This allows Theano to evaluate symbolic expressions on-the-fly (by
+value. This allows Aesara to evaluate symbolic expressions on-the-fly (by
 calling the ``perform`` method of each op), as they are being defined. Sources
 of error can thus be identified with much more precision and much earlier in
 the compilation pipeline. For example, running the above code yields the
@@ -188,22 +188,22 @@ following error message, which properly identifies *line 24* as the culprit.
     Traceback (most recent call last):
       File "test2.py", line 24, in <module>
         h1 = tt.dot(x, func_of_W1)
-      File "PATH_TO_THEANO/theano/tensor/basic.py", line 4734, in dot
+      File "PATH_TO_AESARA/aesara/tensor/basic.py", line 4734, in dot
         return _dot(a, b)
-      File "PATH_TO_THEANO/theano/graph/op.py", line 545, in __call__
+      File "PATH_TO_AESARA/aesara/graph/op.py", line 545, in __call__
         required = thunk()
-      File "PATH_TO_THEANO/theano/graph/op.py", line 752, in rval
+      File "PATH_TO_AESARA/aesara/graph/op.py", line 752, in rval
         r = p(n, [x[0] for x in i], o)
-      File "PATH_TO_THEANO/theano/tensor/basic.py", line 4554, in perform
+      File "PATH_TO_AESARA/aesara/tensor/basic.py", line 4554, in perform
         z[0] = numpy.asarray(numpy.dot(x, y))
     ValueError: matrices are not aligned
 
 The ``compute_test_value`` mechanism works as follows:
 
-* Theano ``constants`` and ``shared`` variables are used as is. No need to instrument them.
-* A Theano *variable* (i.e. ``dmatrix``, ``vector``, etc.) should be
+* Aesara ``constants`` and ``shared`` variables are used as is. No need to instrument them.
+* A Aesara *variable* (i.e. ``dmatrix``, ``vector``, etc.) should be
   given a special test value through the attribute ``tag.test_value``.
-* Theano automatically instruments intermediate results. As such, any quantity
+* Aesara automatically instruments intermediate results. As such, any quantity
   derived from *x* will be given a ``tag.test_value`` automatically.
 
 ``compute_test_value`` can take the following values:
@@ -224,8 +224,8 @@ It is also possible to override variables ``__repr__`` method to have them retur
 
 .. testsetup:: printtestvalue
 
-   import theano
-   import theano.tensor as tt
+   import aesara
+   import aesara.tensor as tt
 
 
 .. testcode:: printtestvalue
@@ -235,11 +235,11 @@ It is also possible to override variables ``__repr__`` method to have them retur
    x.tag.test_value = 42
 
    # Enable test value printing
-   theano.config.print_test_value = True
+   aesara.config.print_test_value = True
    print(x.__repr__())
 
    # Disable test value printing
-   theano.config.print_test_value = False
+   aesara.config.print_test_value = False
    print(x.__repr__())
 
 Running the code above returns the following output:
@@ -254,19 +254,19 @@ Running the code above returns the following output:
 "How do I Print an Intermediate Value in a Function?"
 -----------------------------------------------------
 
-Theano provides a 'Print' op to do this.
+Aesara provides a 'Print' op to do this.
 
 .. testcode::
 
     import numpy
-    import theano
+    import aesara
 
-    x = theano.tensor.dvector('x')
+    x = aesara.tensor.dvector('x')
 
-    x_printed = theano.printing.Print('this is a very important value')(x)
+    x_printed = aesara.printing.Print('this is a very important value')(x)
 
-    f = theano.function([x], x * 5)
-    f_with_print = theano.function([x], x_printed * 5)
+    f = aesara.function([x], x * 5)
+    f_with_print = aesara.function([x], x_printed * 5)
 
     #this runs the graph without any printing
     assert numpy.all( f([1, 2, 3]) == [5, 10, 15])
@@ -278,14 +278,14 @@ Theano provides a 'Print' op to do this.
 
     this is a very important value __str__ = [ 1.  2.  3.]
 
-Since Theano runs your program in a topological order, you won't have precise
+Since Aesara runs your program in a topological order, you won't have precise
 control over the order in which multiple ``Print()`` ops are evaluated.  For a more
 precise inspection of what's being computed where, when, and how, see the discussion
 :ref:`faq_monitormode`.
 
 .. warning::
 
-    Using this ``Print`` Theano Op can prevent some Theano
+    Using this ``Print`` Aesara Op can prevent some Aesara
     optimization from being applied. This can also happen with
     stability optimization. So if you use this Print and have nan, try
     to remove them to know if this is the cause or not.
@@ -296,11 +296,11 @@ precise inspection of what's being computed where, when, and how, see the discus
 
 .. TODO: dead links in the next paragraph
 
-Theano provides two functions (:func:`theano.pp` and
-:func:`theano.printing.debugprint`) to print a graph to the terminal before or after
+Aesara provides two functions (:func:`aesara.pp` and
+:func:`aesara.printing.debugprint`) to print a graph to the terminal before or after
 compilation.  These two functions print expression graphs in different ways:
 :func:`pp` is more compact and math-like, :func:`debugprint` is more verbose.
-Theano also provides :func:`theano.printing.pydotprint` that creates a png image of the function.
+Aesara also provides :func:`aesara.printing.pydotprint` that creates a png image of the function.
 
 You can read about them in :ref:`libdoc_printing`.
 
@@ -311,16 +311,16 @@ You can read about them in :ref:`libdoc_printing`.
 
 First, make sure you're running in ``FAST_RUN`` mode. Even though
 ``FAST_RUN`` is the default mode, insist by passing ``mode='FAST_RUN'``
-to ``theano.function`` (or ``theano.make``) or by setting :attr:`config.mode`
+to ``aesara.function`` (or ``aesara.make``) or by setting :attr:`config.mode`
 to ``FAST_RUN``.
 
-Second, try the Theano :ref:`profiling <tut_profiling>`.  This will tell you which
+Second, try the Aesara :ref:`profiling <tut_profiling>`.  This will tell you which
 ``Apply`` nodes, and which ops are eating up your CPU cycles.
 
 Tips:
 
 * Use the flags ``floatX=float32`` to require type *float32* instead of *float64*;
-  Use the Theano constructors matrix(),vector(),... instead of dmatrix(), dvector(),...
+  Use the Aesara constructors matrix(),vector(),... instead of dmatrix(), dvector(),...
   since they respectively involve the default types *float32* and *float64*.
 * Check in the ``profile`` mode that there is no ``Dot`` op in the post-compilation
   graph while you are multiplying two matrices of the same type. ``Dot`` should be
@@ -332,7 +332,7 @@ Tips:
 "Why does my GPU function seem to be slow?"
 -------------------------------------------
 
-When you compile a theano function, if you do not get the speedup that you expect over the
+When you compile an Aesara function, if you do not get the speedup that you expect over the
 CPU performance of the same code. It is oftentimes due to the fact that some Ops might be running
 on CPU instead GPU. If that is the case, you can use assert_no_cpu_op to check if there
 is a CPU Op on your computational graph. assert_no_cpu_op can take the following one of the three
@@ -343,8 +343,8 @@ options:
 * ``raise``: Raise an error,
   if there is a CPU Op in the computational graph.
 
-It is possible to use this mode by providing the flag in THEANO_FLAGS, such as:
-``THEANO_FLAGS="float32,device=gpu,assert_no_cpu_op='raise'" python test.py``
+It is possible to use this mode by providing the flag in AESARA_FLAGS, such as:
+``AESARA_FLAGS="float32,device=gpu,assert_no_cpu_op='raise'" python test.py``
 
 But note that this optimization will not catch all the CPU Ops, it might miss some
 Ops.
@@ -361,7 +361,7 @@ shows how to print all inputs and outputs:
 .. testcode::
 
     from __future__ import print_function
-    import theano
+    import aesara
 
     def inspect_inputs(fgraph, i, node, fn):
         print(i, node, "input(s) value(s):", [input[0] for input in fn.inputs],
@@ -370,9 +370,9 @@ shows how to print all inputs and outputs:
     def inspect_outputs(fgraph, i, node, fn):
         print(" output(s) value(s):", [output[0] for output in fn.outputs])
 
-    x = theano.tensor.dscalar('x')
-    f = theano.function([x], [5 * x],
-                        mode=theano.compile.MonitorMode(
+    x = aesara.tensor.dscalar('x')
+    f = aesara.function([x], [5 * x],
+                        mode=aesara.compile.MonitorMode(
                             pre_func=inspect_inputs,
                             post_func=inspect_outputs))
     f(3)
@@ -398,12 +398,12 @@ can be achieved as follows:
 
     import numpy
 
-    import theano
+    import aesara
 
     # This is the current suggested detect_nan implementation to
     # show you how it work.  That way, you can modify it for your
     # need.  If you want exactly this method, you can use
-    # ``theano.compile.monitormode.detect_nan`` that will always
+    # ``aesara.compile.monitormode.detect_nan`` that will always
     # contain the current suggested version.
 
     def detect_nan(fgraph, i, node, fn):
@@ -411,14 +411,14 @@ can be achieved as follows:
             if (not isinstance(output[0], numpy.random.RandomState) and
                 numpy.isnan(output[0]).any()):
                 print('*** NaN detected ***')
-                theano.printing.debugprint(node)
+                aesara.printing.debugprint(node)
                 print('Inputs : %s' % [input[0] for input in fn.inputs])
                 print('Outputs: %s' % [output[0] for output in fn.outputs])
                 break
 
-    x = theano.tensor.dscalar('x')
-    f = theano.function([x], [theano.tensor.log(x) * x],
-                        mode=theano.compile.MonitorMode(
+    x = aesara.tensor.dscalar('x')
+    f = aesara.function([x], [aesara.tensor.log(x) * x],
+                        mode=aesara.compile.MonitorMode(
                             post_func=detect_nan))
     f(0)  # log(0) * 0 = -inf * 0 = NaN
 
@@ -438,19 +438,19 @@ operations together. This makes it harder to know which particular
 elemwise causes the problem. The second optimization makes some ops'
 outputs overwrite their inputs. So, if an op creates a bad output, you
 will not be able to see the input that was overwritten in the ``post_func``
-function. To disable those optimizations (with a Theano version after
+function. To disable those optimizations (with an Aesara version after
 0.6rc3), define the MonitorMode like this:
 
 .. testcode:: compiled
 
-   mode = theano.compile.MonitorMode(post_func=detect_nan).excluding(
+   mode = aesara.compile.MonitorMode(post_func=detect_nan).excluding(
        'local_elemwise_fusion', 'inplace')
-   f = theano.function([x], [theano.tensor.log(x) * x],
+   f = aesara.function([x], [aesara.tensor.log(x) * x],
                        mode=mode)
 
 .. note::
 
-    The Theano flags ``optimizer_including``, ``optimizer_excluding``
+    The Aesara flags ``optimizer_including``, ``optimizer_excluding``
     and ``optimizer_requiring`` aren't used by the MonitorMode, they
     are used only by the ``default`` mode. You can't use the ``default``
     mode with MonitorMode, as you need to define what you monitor.
@@ -458,7 +458,7 @@ function. To disable those optimizations (with a Theano version after
 To be sure all inputs of the node are available during the call to
 ``post_func``, you must also disable the garbage collector. Otherwise,
 the execution of the node can garbage collect its inputs that aren't
-needed anymore by the Theano function. This can be done with the Theano
+needed anymore by the Aesara function. This can be done with the Aesara
 flag:
 
 .. code-block:: python
@@ -483,14 +483,14 @@ Consider this example script ("ex.py"):
 
 .. testcode::
 
-   import theano
+   import aesara
    import numpy
-   import theano.tensor as tt
+   import aesara.tensor as tt
 
    a = tt.dmatrix('a')
    b = tt.dmatrix('b')
 
-   f = theano.function([a, b], [a * b])
+   f = aesara.function([a, b], [a * b])
 
    # matrices chosen so dimensions are unsuitable for multiplication
    mat1 = numpy.arange(12).reshape((3, 4))
@@ -515,9 +515,9 @@ Consider this example script ("ex.py"):
 
    Backtrace when the node is created:
      File "<doctest default[0]>", line 8, in <module>
-       f = theano.function([a, b], [a * b])
+       f = aesara.function([a, b], [a * b])
 
-   HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
+   HINT: Use the Aesara flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
 
 This is actually so simple the debugging could be done easily, but it's for
 illustrative purposes. As the matrices can't be multiplied element-wise
@@ -527,10 +527,10 @@ illustrative purposes. As the matrices can't be multiplied element-wise
 
     File "ex.py", line 14, in <module>
       f(mat1, mat2)
-    File "/u/username/Theano/theano/compile/function/types.py", line 451, in __call__
-    File "/u/username/Theano/theano/graph/link.py", line 271, in streamline_default_f
-    File "/u/username/Theano/theano/graph/link.py", line 267, in streamline_default_f
-    File "/u/username/Theano/theano/graph/cc.py", line 1049, in execute ValueError: ('Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 5)', Elemwise{mul,no_inplace}(a, b), Elemwise{mul,no_inplace}(a, b))
+    File "/u/username/Aesara/aesara/compile/function/types.py", line 451, in __call__
+    File "/u/username/Aesara/aesara/graph/link.py", line 271, in streamline_default_f
+    File "/u/username/Aesara/aesara/graph/link.py", line 267, in streamline_default_f
+    File "/u/username/Aesara/aesara/graph/cc.py", line 1049, in execute ValueError: ('Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 5)', Elemwise{mul,no_inplace}(a, b), Elemwise{mul,no_inplace}(a, b))
 
 The call stack contains some useful information to trace back the source
 of the error. There's the script where the compiled function was called --
@@ -540,7 +540,7 @@ tells us about the op that caused the exception. In this case it's a "mul"
 involving variables with names "a" and "b". But suppose we instead had an
 intermediate result to which we hadn't given a name.
 
-After learning a few things about the graph structure in Theano, we can use
+After learning a few things about the graph structure in Aesara, we can use
 the Python debugger to explore the graph, and then we can get runtime
 information about the error. Matrix dimensions, especially, are useful to
 pinpoint the source of the error. In the printout, there are also 2 of the 4
@@ -552,7 +552,7 @@ the debugger module and run the program with "c":
 
     python -m pdb ex.py
     > /u/username/experiments/doctmp1/ex.py(1)<module>()
-    -> import theano
+    -> import aesara
     (Pdb) c
 
 Then we get back the above error printout, but the interpreter breaks in
@@ -583,31 +583,31 @@ Dumping a Function to help debug
 If you are reading this, there is high chance that you emailed our
 mailing list and we asked you to read this section. This section
 explain how to dump all the parameter passed to
-``theano.function()``. This is useful to help us reproduce a problem
+``aesara.function()``. This is useful to help us reproduce a problem
 during compilation and it doesn't request you to make a self contained
 example.
 
 For this to work, we need to be able to import the code for all Op in
 the graph. So if you create your own Op, we will need this
 code. Otherwise, we won't be able to unpickle it. We already have all
-the Ops from Theano and Pylearn2.
+the Ops from Aesara and Pylearn2.
 
 .. code-block:: python
 
     # Replace this line:
-    theano.function(...)
+    aesara.function(...)
     # with
-    theano.function_dump(filename, ...)
+    aesara.function_dump(filename, ...)
     # Where filename is a string to a file that we will write to.
 
 Then send us filename.
 
 
-Breakpoint during Theano function execution
+Breakpoint during Aesara function execution
 -------------------------------------------
 
-You can set a breakpoint during the execution of a Theano function with
-:class:`PdbBreakpoint <theano.breakpoint.PdbBreakpoint>`.
-:class:`PdbBreakpoint <theano.breakpoint.PdbBreakpoint>` automatically
+You can set a breakpoint during the execution of an Aesara function with
+:class:`PdbBreakpoint <aesara.breakpoint.PdbBreakpoint>`.
+:class:`PdbBreakpoint <aesara.breakpoint.PdbBreakpoint>` automatically
 detects available debuggers and uses the first available in the following order:
 `pudb`, `ipdb`, or `pdb`.
diff --git a/doc/tutorial/examples.txt b/doc/tutorial/examples.txt
index 77a13c8cff..65a0198938 100644
--- a/doc/tutorial/examples.txt
+++ b/doc/tutorial/examples.txt
@@ -6,7 +6,7 @@ More Examples
 =============
 
 At this point it would be wise to begin familiarizing yourself more
-systematically with Theano's fundamental objects and operations by
+systematically with Aesara's fundamental objects and operations by
 browsing this section of the library: :ref:`libdoc_basic_tensor`.
 
 As the tutorial unfolds, you should also gradually acquaint yourself
@@ -40,11 +40,11 @@ Well, what you do is this:
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_1
 
->>> import theano
->>> import theano.tensor as tt
+>>> import aesara
+>>> import aesara.tensor as tt
 >>> x = tt.dmatrix('x')
 >>> s = 1 / (1 + tt.exp(-x))
->>> logistic = theano.function([x], s)
+>>> logistic = aesara.function([x], s)
 >>> logistic([[0, 1], [-1, -2]])
 array([[ 0.5       ,  0.73105858],
        [ 0.26894142,  0.11920292]])
@@ -65,7 +65,7 @@ We can verify that this alternate form produces the same values:
 .. tests/test_tutorial.py:T_examples.test_examples_2
 
 >>> s2 = (1 + tt.tanh(x / 2)) / 2
->>> logistic2 = theano.function([x], s2)
+>>> logistic2 = aesara.function([x], s2)
 >>> logistic2([[0, 1], [-1, -2]])
 array([[ 0.5       ,  0.73105858],
        [ 0.26894142,  0.11920292]])
@@ -74,7 +74,7 @@ array([[ 0.5       ,  0.73105858],
 Computing More than one Thing at the Same Time
 ==============================================
 
-Theano supports functions with multiple outputs. For example, we can
+Aesara supports functions with multiple outputs. For example, we can
 compute the :ref:`elementwise <libdoc_tensor_elementwise>` difference, absolute difference, and
 squared difference between two matrices *a* and *b* at the same time:
 
@@ -85,7 +85,7 @@ squared difference between two matrices *a* and *b* at the same time:
 >>> diff = a - b
 >>> abs_diff = abs(diff)
 >>> diff_squared = diff**2
->>> f = theano.function([a, b], [diff, abs_diff, diff_squared])
+>>> f = aesara.function([a, b], [diff, abs_diff, diff_squared])
 
 .. note::
    `dmatrices` produces as many outputs as names that you provide.  It is a
@@ -112,8 +112,8 @@ one. You can do it like this:
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_6
 
->>> from theano.compile.io import In
->>> from theano import function
+>>> from aesara.compile.io import In
+>>> from aesara import function
 >>> x, y = tt.dscalars('x', 'y')
 >>> z = x + y
 >>> f = function([x, In(y, value=1)], z)
@@ -178,7 +178,7 @@ internal state, and returns the old state value.
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_8
 
->>> from theano import shared
+>>> from aesara import shared
 >>> state = shared(0)
 >>> inc = tt.iscalar('inc')
 >>> accumulator = function([inc], state, updates=[(state, state+inc)])
@@ -242,7 +242,7 @@ achieve a similar result by returning the new expressions, and working with
 them in NumPy as usual.  The updates mechanism can be a syntactic convenience,
 but it is mainly there for efficiency.  Updates to shared variables can
 sometimes be done more quickly using in-place algorithms (e.g. low-rank matrix
-updates).  Also, Theano has more control over where and how shared variables are
+updates).  Also, Aesara has more control over where and how shared variables are
 allocated, which is one of the important elements of getting good performance
 on the :ref:`GPU<using_gpu>`.
 
@@ -276,26 +276,26 @@ expression that evaluates to a tensor of same shape and dtype.
 
 .. note::
 
-    Theano shared variable broadcast pattern default to False for each
+    Aesara shared variable broadcast pattern default to False for each
     dimensions. Shared variable size can change over time, so we can't
     use the shape to find the broadcastable pattern. If you want a
     different pattern, just pass it as a parameter
-    ``theano.shared(..., broadcastable=(True, False))``
+    ``aesara.shared(..., broadcastable=(True, False))``
 
 Copying functions
 =================
-Theano functions can be copied, which can be useful for creating similar
+Aesara functions can be copied, which can be useful for creating similar
 functions but with different shared variables or updates. This is done using
-the :func:`copy()<theano.compile.function.types.Function.copy>` method of ``function`` objects. The optimized graph of the original function is copied,
+the :func:`copy()<aesara.compile.function.types.Function.copy>` method of ``function`` objects. The optimized graph of the original function is copied,
 so compilation only needs to be performed once.
 
 Let's start from the accumulator defined above:
 
->>> import theano
->>> import theano.tensor as tt
->>> state = theano.shared(0)
+>>> import aesara
+>>> import aesara.tensor as tt
+>>> state = aesara.shared(0)
 >>> inc = tt.iscalar('inc')
->>> accumulator = theano.function([inc], state, updates=[(state, state+inc)])
+>>> accumulator = aesara.function([inc], state, updates=[(state, state+inc)])
 
 We can use it to increment the state as usual:
 
@@ -307,7 +307,7 @@ array(0)
 We can use ``copy()`` to create a similar accumulator but with its own internal state
 using the ``swap`` parameter, which is a dictionary of shared variables to exchange:
 
->>> new_state = theano.shared(0)
+>>> new_state = aesara.shared(0)
 >>> new_accumulator = accumulator.copy(swap={state:new_state})
 >>> new_accumulator(100)
 [array(0)]
@@ -336,18 +336,18 @@ As expected, the shared state is no longer updated:
 Using Random Numbers
 ====================
 
-Because in Theano you first express everything symbolically and
+Because in Aesara you first express everything symbolically and
 afterwards compile this expression to get functions,
 using pseudo-random numbers is not as straightforward as it is in
 NumPy, though also not too complicated.
 
-The way to think about putting randomness into Theano's computations is
-to put random variables in your graph. Theano will allocate a NumPy
+The way to think about putting randomness into Aesara's computations is
+to put random variables in your graph. Aesara will allocate a NumPy
 `RandomStream` object (a random number generator) for each such
 variable, and draw from it as necessary. We will call this sort of
 sequence of random numbers a *random stream*. *Random streams* are at
 their core shared variables, so the observations on shared variables
-hold here as well. Theano's random objects are defined and implemented in
+hold here as well. Aesara's random objects are defined and implemented in
 :ref:`RandomStream<libdoc_tensor_random_utils>` and, at a lower level,
 in :ref:`RandomVariable<libdoc_tensor_random_basic>`.
 
@@ -361,8 +361,8 @@ Here's a brief example.  The setup code is:
 
 .. testcode::
 
-    from theano.tensor.random.utils import RandomStream
-    from theano import function
+    from aesara.tensor.random.utils import RandomStream
+    from aesara import function
     srng = RandomStream(seed=234)
     rv_u = srng.uniform(0, 1, size=(2,2))
     rv_n = srng.normal(0, 1, size=(2,2))
@@ -439,33 +439,33 @@ array([[ 0.,  0.],
 >>> v2 = f()             # v2 != v1
 >>> v3 = f()             # v3 == v1
 
-Copying Random State Between Theano Graphs
+Copying Random State Between Aesara Graphs
 ------------------------------------------
 
 In some use cases, a user might want to transfer the "state" of all random
-number generators associated with a given theano graph (e.g. g1, with compiled
+number generators associated with a given aesara graph (e.g. g1, with compiled
 function f1 below) to a second graph (e.g. g2, with function f2). This might
 arise for example if you are trying to initialize the state of a model, from
 the parameters of a pickled version of a previous model. For
-:class:`theano.tensor.random.utils.RandomStream` and
-:class:`theano.sandbox.rng_mrg.MRG_RandomStream`
+:class:`aesara.tensor.random.utils.RandomStream` and
+:class:`aesara.sandbox.rng_mrg.MRG_RandomStream`
 this can be achieved by copying elements of the `state_updates` parameter.
 
 Each time a random variable is drawn from a `RandomStream` object, a tuple is
 added to the `state_updates` list. The first element is a shared variable,
 which represents the state of the random number generator associated with this
-*particular* variable, while the second represents the theano graph
+*particular* variable, while the second represents the aesara graph
 corresponding to the random number generation process (i.e. RandomFunction{uniform}.0).
 
-An example of how "random states" can be transferred from one theano function
+An example of how "random states" can be transferred from one aesara function
 to another is shown below.
 
 >>> from __future__ import print_function
->>> import theano
+>>> import aesara
 >>> import numpy
->>> import theano.tensor as tt
->>> from theano.sandbox.rng_mrg import MRG_RandomStream
->>> from theano.tensor.random.utils import RandomStream
+>>> import aesara.tensor as tt
+>>> from aesara.sandbox.rng_mrg import MRG_RandomStream
+>>> from aesara.tensor.random.utils import RandomStream
 
 >>> class Graph():
 ...     def __init__(self, seed=123):
@@ -473,10 +473,10 @@ to another is shown below.
 ...         self.y = self.rng.uniform(size=(1,))
 
 >>> g1 = Graph(seed=123)
->>> f1 = theano.function([], g1.y)
+>>> f1 = aesara.function([], g1.y)
 
 >>> g2 = Graph(seed=987)
->>> f2 = theano.function([], g2.y)
+>>> f2 = aesara.function([], g2.y)
 
 >>> # By default, the two functions are out of sync.
 >>> f1()
@@ -490,7 +490,7 @@ array([ 0.55056769])
 ...     for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
 ...         su2[0].set_value(su1[0].get_value())
 
->>> # We now copy the state of the theano random number generators.
+>>> # We now copy the state of the aesara random number generators.
 >>> copy_random_state(g1, g2)
 >>> f1()
 array([ 0.59044123])
@@ -518,7 +518,7 @@ The `RandomStream` only work on the CPU, MRG31k3p work on the CPU and GPU.
 
         .. code-block:: python
 
-            from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream
+            from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
 
 .. _logistic_regression:
 
@@ -532,8 +532,8 @@ It will be used repeatedly.
 .. testcode::
 
     import numpy
-    import theano
-    import theano.tensor as tt
+    import aesara
+    import aesara.tensor as tt
     rng = numpy.random
 
     N = 400                                   # training sample size
@@ -543,7 +543,7 @@ It will be used repeatedly.
     D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
     training_steps = 10000
 
-    # Declare Theano symbolic variables
+    # Declare Aesara symbolic variables
     x = tt.dmatrix("x")
     y = tt.dvector("y")
 
@@ -552,16 +552,16 @@ It will be used repeatedly.
     # this and the following bias variable b
     # are shared so they keep their values
     # between training iterations (updates)
-    w = theano.shared(rng.randn(feats), name="w")
+    w = aesara.shared(rng.randn(feats), name="w")
 
     # initialize the bias term
-    b = theano.shared(0., name="b")
+    b = aesara.shared(0., name="b")
 
     print("Initial model:")
     print(w.get_value())
     print(b.get_value())
 
-    # Construct Theano expression graph
+    # Construct Aesara expression graph
     p_1 = 1 / (1 + tt.exp(-T.dot(x, w) - b))        # Probability that target = 1
     prediction = p_1 > 0.5                          # The prediction thresholded
     xent = -y * tt.log(p_1) - (1-y) * tt.log(1-p_1) # Cross-entropy loss function
@@ -574,11 +574,11 @@ It will be used repeatedly.
                                                     # tutorial)
 
     # Compile
-    train = theano.function(
+    train = aesara.function(
               inputs=[x,y],
               outputs=[prediction, xent],
               updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
-    predict = theano.function(inputs=[x], outputs=prediction)
+    predict = aesara.function(inputs=[x], outputs=prediction)
 
     # Train
     for i in range(training_steps):
diff --git a/doc/tutorial/extending_aesara.txt b/doc/tutorial/extending_aesara.txt
new file mode 100644
index 0000000000..d9fc1f11a2
--- /dev/null
+++ b/doc/tutorial/extending_aesara.txt
@@ -0,0 +1,3 @@
+:orphan:
+
+This page has been moved. Please refer to: :ref:`extending_aesara`.
diff --git a/doc/tutorial/extending_aesara_c.txt b/doc/tutorial/extending_aesara_c.txt
new file mode 100644
index 0000000000..e693c9647b
--- /dev/null
+++ b/doc/tutorial/extending_aesara_c.txt
@@ -0,0 +1,3 @@
+:orphan:
+
+This page has been moved. Please refer to: :ref:`extending_aesara_c`.
diff --git a/doc/tutorial/extending_theano.txt b/doc/tutorial/extending_theano.txt
deleted file mode 100644
index df4a0bad0f..0000000000
--- a/doc/tutorial/extending_theano.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-:orphan:
-
-This page has been moved. Please refer to: :ref:`extending_theano`. 
diff --git a/doc/tutorial/extending_theano_c.txt b/doc/tutorial/extending_theano_c.txt
deleted file mode 100644
index da0b468d1d..0000000000
--- a/doc/tutorial/extending_theano_c.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-:orphan:
-
-This page has been moved. Please refer to: :ref:`extending_theano_c`. 
diff --git a/doc/tutorial/faq_tutorial.txt b/doc/tutorial/faq_tutorial.txt
index fa74b9d18e..a686c68561 100644
--- a/doc/tutorial/faq_tutorial.txt
+++ b/doc/tutorial/faq_tutorial.txt
@@ -16,14 +16,14 @@ For example if you want to learn a lookup table, e.g. used for
 word embeddings, where each row is a vector of weights representing
 the embedding that the model has learned for a word, in each iteration,
 the only rows that should get updated are those containing embeddings
-used during the forward propagation. Here is how the theano function
+used during the forward propagation. Here is how the aesara function
 should be written:
 
 Defining a shared variable for the lookup table
 
 .. code-block:: python
 
-   lookup_table = theano.shared(matrix_ndarray)
+   lookup_table = aesara.shared(matrix_ndarray)
 
 Getting a subset of the table (some rows or some columns) by passing
 an integer vector of indices corresponding to those rows or columns.
@@ -40,11 +40,11 @@ Defining cost which depends only on subset and not the entire lookup_table
 .. code-block:: python
 
    cost = something that depends on subset
-   g = theano.grad(cost, subset)
+   g = aesara.grad(cost, subset)
 
 There are two ways for updating the parameters:
 Either use inc_subtensor or set_subtensor. It is recommended to use
-inc_subtensor. Some theano optimizations do the conversion between
+inc_subtensor. Some aesara optimizations do the conversion between
 the two functions, but not in all cases.
 
 .. code-block:: python
@@ -60,11 +60,11 @@ OR
 Currently we just cover the case here,
 not if you use inc_subtensor or set_subtensor with other types of indexing.
 
-Defining the theano function
+Defining the aesara function
 
 .. code-block:: python
 
-   f = theano.function(..., updates=[(lookup_table, updates)])
+   f = aesara.function(..., updates=[(lookup_table, updates)])
 
 Note that you can compute the gradient of the cost function w.r.t.
 the entire lookup_table, and the gradient will have nonzero rows only
diff --git a/doc/tutorial/gradients.txt b/doc/tutorial/gradients.txt
index a433774e0d..a60cd0a718 100644
--- a/doc/tutorial/gradients.txt
+++ b/doc/tutorial/gradients.txt
@@ -3,13 +3,13 @@
 
 
 =====================
-Derivatives in Theano
+Derivatives in Aesara
 =====================
 
 Computing Gradients
 ===================
 
-Now let's use Theano for a slightly more sophisticated task: create a
+Now let's use Aesara for a slightly more sophisticated task: create a
 function which computes the derivative of some expression *y* with
 respect to its parameter *x*. To do this we will use the macro ``tt.grad``.
 For instance, we can compute the
@@ -24,15 +24,15 @@ Here is the code to compute this gradient:
 .. tests/test_tutorial.py:T_examples.test_examples_4
 
 >>> import numpy
->>> import theano
->>> import theano.tensor as tt
->>> from theano import pp
+>>> import aesara
+>>> import aesara.tensor as tt
+>>> from aesara import pp
 >>> x = tt.dscalar('x')
 >>> y = x ** 2
 >>> gy = tt.grad(y, x)
 >>> pp(gy)  # print out the gradient prior to optimization
 '((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
->>> f = theano.function([x], gy)
+>>> f = aesara.function([x], gy)
 >>> f(4)
 array(8.0)
 >>> numpy.allclose(f(94.2), 188.4)
@@ -71,14 +71,14 @@ logistic is: :math:`ds(x)/dx = s(x) \cdot (1 - s(x))`.
 >>> x = tt.dmatrix('x')
 >>> s = tt.sum(1 / (1 + tt.exp(-x)))
 >>> gs = tt.grad(s, x)
->>> dlogistic = theano.function([x], gs)
+>>> dlogistic = aesara.function([x], gs)
 >>> dlogistic([[0, 1], [-1, -2]])
 array([[ 0.25      ,  0.19661193],
        [ 0.19661193,  0.10499359]])
 
 In general, for any **scalar** expression *s*, ``tt.grad(s, w)`` provides
-the Theano expression for computing :math:`\frac{\partial s}{\partial w}`. In
-this way Theano can be used for doing **efficient** symbolic differentiation
+the Aesara expression for computing :math:`\frac{\partial s}{\partial w}`. In
+this way Aesara can be used for doing **efficient** symbolic differentiation
 (as the expression returned by ``tt.grad`` will be optimized during compilation), even for
 function with many inputs. (see `automatic differentiation <http://en.wikipedia.org/wiki/Automatic_differentiation>`_ for a description
 of symbolic differentiation).
@@ -95,15 +95,15 @@ of symbolic differentiation).
    :ref:`this<libdoc_gradient>` section of the library.
 
    Additional information on the inner workings of differentiation may also be
-   found in the more advanced tutorial :ref:`Extending Theano<extending>`.
+   found in the more advanced tutorial :ref:`Extending Aesara<extending>`.
 
 Computing the Jacobian
 ======================
 
-In Theano's parlance, the term *Jacobian* designates the tensor comprising the
+In Aesara's parlance, the term *Jacobian* designates the tensor comprising the
 first partial derivatives of the output of a function with respect to its inputs.
 (This is a generalization of to the so-called Jacobian matrix in Mathematics.)
-Theano implements the :func:`theano.gradient.jacobian` macro that does all
+Aesara implements the :func:`aesara.gradient.jacobian` macro that does all
 that is needed to compute the Jacobian. The following text explains how
 to do it manually.
 
@@ -114,18 +114,18 @@ do is to loop over the entries in *y* and compute the gradient of
 
 .. note::
 
-    ``scan`` is a generic op in Theano that allows writing in a symbolic
+    ``scan`` is a generic op in Aesara that allows writing in a symbolic
     manner all kinds of recurrent equations. While creating
     symbolic loops (and optimizing them for performance) is a hard task,
     effort is being done for improving the performance of ``scan``. We
     shall return to :ref:`scan<tutloop>` later in this tutorial.
 
->>> import theano
->>> import theano.tensor as tt
+>>> import aesara
+>>> import aesara.tensor as tt
 >>> x = tt.dvector('x')
 >>> y = x ** 2
->>> J, updates = theano.scan(lambda i, y, x : tt.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
->>> f = theano.function([x], J, updates=updates)
+>>> J, updates = aesara.scan(lambda i, y, x : tt.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
+>>> f = aesara.function([x], J, updates=updates)
 >>> f([4, 4])
 array([[ 8.,  0.],
        [ 0.,  8.]])
@@ -139,7 +139,7 @@ matrix which corresponds to the Jacobian.
 .. note::
     There are some pitfalls to be aware of regarding ``tt.grad``. One of them is that you
     cannot re-write the above expression of the Jacobian as
-    ``theano.scan(lambda y_i,x: tt.grad(y_i,x), sequences=y,
+    ``aesara.scan(lambda y_i,x: tt.grad(y_i,x), sequences=y,
     non_sequences=x)``, even though from the documentation of scan this
     seems possible. The reason is that *y_i* will not be a function of
     *x* anymore, while *y[i]* still is.
@@ -148,9 +148,9 @@ matrix which corresponds to the Jacobian.
 Computing the Hessian
 =====================
 
-In Theano, the term *Hessian* has the usual mathematical meaning: It is the
+In Aesara, the term *Hessian* has the usual mathematical meaning: It is the
 matrix comprising the second order partial derivative of a function with scalar
-output and vector input. Theano implements :func:`theano.gradient.hessian` macro that does all
+output and vector input. Aesara implements :func:`aesara.gradient.hessian` macro that does all
 that is needed to compute the Hessian. The following text explains how
 to do it manually.
 
@@ -163,8 +163,8 @@ scalar.
 >>> y = x ** 2
 >>> cost = y.sum()
 >>> gy = tt.grad(cost, x)
->>> H, updates = theano.scan(lambda i, gy,x : tt.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
->>> f = theano.function([x], H, updates=updates)
+>>> H, updates = aesara.scan(lambda i, gy,x : tt.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
+>>> f = aesara.function([x], H, updates=updates)
 >>> f([4, 4])
 array([[ 2.,  0.],
        [ 0.,  2.]])
@@ -182,7 +182,7 @@ performance gains. A description of one such algorithm can be found here:
 * Barak A. Pearlmutter, "Fast Exact Multiplication by the Hessian", *Neural
   Computation, 1994*
 
-While in principle we would want Theano to identify these patterns automatically for us,
+While in principle we would want Aesara to identify these patterns automatically for us,
 in practice, implementing such optimizations in a generic manner is extremely
 difficult. Therefore, we provide special functions dedicated to these tasks.
 
@@ -195,7 +195,7 @@ vector, namely :math:`\frac{\partial f(x)}{\partial x} v`. The formulation
 can be extended even for *x* being a matrix, or a tensor in general, case in
 which also the Jacobian becomes a tensor and the product becomes some kind
 of tensor product. Because in practice we end up needing to compute such
-expressions in terms of weight matrices, Theano supports this more generic
+expressions in terms of weight matrices, Aesara supports this more generic
 form of the operation. In order to evaluate the *R-operation* of
 expression *y*, with respect to *x*, multiplying the Jacobian with *v*
 you need to do something similar to this:
@@ -204,8 +204,8 @@ you need to do something similar to this:
 >>> V = tt.dmatrix('V')
 >>> x = tt.dvector('x')
 >>> y = tt.dot(x, W)
->>> JV = theano.gradient.Rop(y, W, V)
->>> f = theano.function([W, V, x], JV)
+>>> JV = aesara.gradient.Rop(y, W, V)
+>>> f = aesara.function([W, V, x], JV)
 >>> f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1])
 array([ 2.,  2.])
 
@@ -223,8 +223,8 @@ f(x)}{\partial x}`. The *L-operator* is also supported for generic tensors
 >>> v = tt.dvector('v')
 >>> x = tt.dvector('x')
 >>> y = tt.dot(x, W)
->>> VJ = theano.gradient.Lop(y, W, v)
->>> f = theano.function([v,x], VJ)
+>>> VJ = aesara.gradient.Lop(y, W, v)
+>>> f = aesara.function([v,x], VJ)
 >>> f([2, 2], [0, 1])
 array([[ 0.,  0.],
        [ 2.,  2.]])
@@ -256,7 +256,7 @@ Hence, we suggest profiling the methods before using either one of the two:
 >>> y = tt.sum(x ** 2)
 >>> gy = tt.grad(y, x)
 >>> vH = tt.grad(tt.sum(gy * v), x)
->>> f = theano.function([x, v], vH)
+>>> f = aesara.function([x, v], vH)
 >>> f([4, 4], [2, 2])
 array([ 4.,  4.])
 
@@ -267,8 +267,8 @@ or, making use of the *R-operator*:
 >>> v = tt.dvector('v')
 >>> y = tt.sum(x ** 2)
 >>> gy = tt.grad(y, x)
->>> Hv = theano.gradient.Rop(gy, x, v)
->>> f = theano.function([x, v], Hv)
+>>> Hv = aesara.gradient.Rop(gy, x, v)
+>>> f = aesara.function([x, v], Hv)
 >>> f([4, 4], [2, 2])
 array([ 4.,  4.])
 
@@ -277,7 +277,7 @@ Final Pointers
 ==============
 
 
-* The ``grad`` function works symbolically: it receives and returns Theano variables.
+* The ``grad`` function works symbolically: it receives and returns Aesara variables.
 
 * ``grad`` can be compared to a macro since it can be applied repeatedly.
 
diff --git a/doc/tutorial/index.txt b/doc/tutorial/index.txt
index ef924cb1e7..16bd378ed1 100644
--- a/doc/tutorial/index.txt
+++ b/doc/tutorial/index.txt
@@ -5,15 +5,15 @@
 Tutorial
 ========
 
-Let us start an interactive session (e.g. with ``python`` or ``ipython``) and import Theano.
+Let us start an interactive session (e.g. with ``python`` or ``ipython``) and import Aesara.
 
->>> from theano import *
+>>> from aesara import *
 
 Several of the symbols you will need to use are in the ``tensor`` subpackage
-of Theano. Let us import that subpackage under a handy name like
+of Aesara. Let us import that subpackage under a handy name like
 ``tt`` (the tutorials will frequently use this convention).
 
->>> import theano.tensor as tt
+>>> import aesara.tensor as tt
 
 If that succeeded you are ready for the tutorial, otherwise check your
 installation (see :ref:`install`).
diff --git a/doc/tutorial/loading_and_saving.txt b/doc/tutorial/loading_and_saving.txt
index 7a7d4e2b8d..86e075f42a 100644
--- a/doc/tutorial/loading_and_saving.txt
+++ b/doc/tutorial/loading_and_saving.txt
@@ -6,7 +6,7 @@ Loading and Saving
 ==================
 
 Python's standard way of saving class instances and reloading them
-is the pickle_ mechanism. Many Theano objects can be *serialized* (and
+is the pickle_ mechanism. Many Aesara objects can be *serialized* (and
 *deserialized*) by ``pickle``, however, a limitation of ``pickle`` is that
 it does not save the code or data of a class along with the instance of
 the class being serialized. As a result, reloading objects created by a
@@ -15,8 +15,8 @@ previous version of a class can be really problematic.
 Thus, you will want to consider different mechanisms depending on
 the amount of time you anticipate between saving and reloading.  For
 short-term (such as temp files and network transfers), pickling of
-the Theano objects or classes is possible.  For longer-term (such as
-saving models from an experiment) you should not rely on pickled Theano
+the Aesara objects or classes is possible.  For longer-term (such as
+saving models from an experiment) you should not rely on pickled Aesara
 objects; we recommend loading and saving the underlying shared objects
 as you would in the course of any other Python program.
 
@@ -124,13 +124,13 @@ For instance, you can define functions along the lines of:
 Robust Serialization
 ====================
 
-This type of serialization uses some helper functions particular to Theano. It
+This type of serialization uses some helper functions particular to Aesara. It
 serializes the object using Python's pickling protocol, but any ``ndarray`` or
 ``CudaNdarray`` objects contained within the object are saved separately as NPY
 files. These NPY files and the Pickled file are all saved together in single
 ZIP-file.
 
-The main advantage of this approach is that you don't even need Theano installed
+The main advantage of this approach is that you don't even need Aesara installed
 in order to look at the values of shared variables that you pickled. You can
 just load the parameters manually with `numpy`.
 
@@ -140,11 +140,11 @@ just load the parameters manually with `numpy`.
     numpy.load('model.zip')
 
 This approach could be beneficial if you are sharing your model with people who
-might not have Theano installed, who are using a different Python version, or if
+might not have Aesara installed, who are using a different Python version, or if
 you are planning to save your model for a long time (in which case version
 mismatches might make it difficult to unpickle objects).
 
-See :func:`theano.misc.pkl_utils.dump` and :func:`theano.misc.pkl_utils.load`.
+See :func:`aesara.misc.pkl_utils.dump` and :func:`aesara.misc.pkl_utils.load`.
 
 
 Long-Term Serialization
diff --git a/doc/tutorial/loop.txt b/doc/tutorial/loop.txt
index a3b81fd1d3..f085e5d004 100644
--- a/doc/tutorial/loop.txt
+++ b/doc/tutorial/loop.txt
@@ -13,13 +13,13 @@ Scan
 - You ``scan`` a function along some input sequence, producing an output at each time-step.
 - The function can see the *previous K time-steps* of your function.
 - ``sum()`` could be computed by scanning the *z + x(i)* function over a list, given an initial state of *z=0*.
-- Often a *for* loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
+- Often a *for* loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Aesara comes to looping.
 - Advantages of using ``scan`` over *for* loops:
 
   - Number of iterations to be part of the symbolic graph.
   - Minimizes GPU transfers (if GPU is involved).
   - Computes gradients through sequential steps.
-  - Slightly faster than using a *for* loop in Python with a compiled Theano function.
+  - Slightly faster than using a *for* loop in Python with a compiled Aesara function.
   - Can lower the overall memory usage by detecting the actual amount of memory needed.
 
 The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
@@ -31,8 +31,8 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
 
   # defining the tensor variables
@@ -40,13 +40,13 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
   W = tt.matrix("W")
   b_sym = tt.vector("b_sym")
 
-  results, updates = theano.scan(lambda v: tt.tanh(tt.dot(v, W) + b_sym), sequences=X)
-  compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=results)
+  results, updates = aesara.scan(lambda v: tt.tanh(tt.dot(v, W) + b_sym), sequences=X)
+  compute_elementwise = aesara.function(inputs=[X, W, b_sym], outputs=results)
 
   # test values
-  x = np.eye(2, dtype=theano.config.floatX)
-  w = np.ones((2, 2), dtype=theano.config.floatX)
-  b = np.ones((2), dtype=theano.config.floatX)
+  x = np.eye(2, dtype=aesara.config.floatX)
+  w = np.ones((2, 2), dtype=aesara.config.floatX)
+  b = np.ones((2), dtype=aesara.config.floatX)
   b[1] = 2
 
   print(compute_elementwise(x, w, b))
@@ -65,8 +65,8 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
 
   # define tensor variables
@@ -78,25 +78,25 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
   V = tt.matrix("V")
   P = tt.matrix("P")
 
-  results, updates = theano.scan(lambda y, p, x_tm1: tt.tanh(tt.dot(x_tm1, W) + tt.dot(y, U) + tt.dot(p, V)),
+  results, updates = aesara.scan(lambda y, p, x_tm1: tt.tanh(tt.dot(x_tm1, W) + tt.dot(y, U) + tt.dot(p, V)),
             sequences=[Y, P[::-1]], outputs_info=[X])
-  compute_seq = theano.function(inputs=[X, W, Y, U, P, V], outputs=results)
+  compute_seq = aesara.function(inputs=[X, W, Y, U, P, V], outputs=results)
 
   # test values
-  x = np.zeros((2), dtype=theano.config.floatX)
+  x = np.zeros((2), dtype=aesara.config.floatX)
   x[1] = 1
-  w = np.ones((2, 2), dtype=theano.config.floatX)
-  y = np.ones((5, 2), dtype=theano.config.floatX)
+  w = np.ones((2, 2), dtype=aesara.config.floatX)
+  y = np.ones((5, 2), dtype=aesara.config.floatX)
   y[0, :] = -3
-  u = np.ones((2, 2), dtype=theano.config.floatX)
-  p = np.ones((5, 2), dtype=theano.config.floatX)
+  u = np.ones((2, 2), dtype=aesara.config.floatX)
+  p = np.ones((5, 2), dtype=aesara.config.floatX)
   p[0, :] = 3
-  v = np.ones((2, 2), dtype=theano.config.floatX)
+  v = np.ones((2, 2), dtype=aesara.config.floatX)
 
   print(compute_seq(x, w, y, u, p, v))
 
   # comparison with numpy
-  x_res = np.zeros((5, 2), dtype=theano.config.floatX)
+  x_res = np.zeros((5, 2), dtype=aesara.config.floatX)
   x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
   for i in range(1, 5):
       x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
@@ -119,17 +119,17 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
 
   # define tensor variable
   X = tt.matrix("X")
-  results, updates = theano.scan(lambda x_i: tt.sqrt((x_i ** 2).sum()), sequences=[X])
-  compute_norm_lines = theano.function(inputs=[X], outputs=results)
+  results, updates = aesara.scan(lambda x_i: tt.sqrt((x_i ** 2).sum()), sequences=[X])
+  compute_norm_lines = aesara.function(inputs=[X], outputs=results)
 
   # test value
-  x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
+  x = np.diag(np.arange(1, 6, dtype=aesara.config.floatX), 1)
   print(compute_norm_lines(x))
 
   # comparison with numpy
@@ -144,17 +144,17 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
 
   # define tensor variable
   X = tt.matrix("X")
-  results, updates = theano.scan(lambda x_i: tt.sqrt((x_i ** 2).sum()), sequences=[X.T])
-  compute_norm_cols = theano.function(inputs=[X], outputs=results)
+  results, updates = aesara.scan(lambda x_i: tt.sqrt((x_i ** 2).sum()), sequences=[X.T])
+  compute_norm_cols = aesara.function(inputs=[X], outputs=results)
 
   # test value
-  x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
+  x = np.diag(np.arange(1, 6, dtype=aesara.config.floatX), 1)
   print(compute_norm_cols(x))
 
   # comparison with numpy
@@ -169,22 +169,22 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
   floatX = "float32"
 
   # define tensor variable
   X = tt.matrix("X")
-  results, updates = theano.scan(lambda i, j, t_f: tt.cast(X[i, j] + t_f, floatX),
+  results, updates = aesara.scan(lambda i, j, t_f: tt.cast(X[i, j] + t_f, floatX),
                     sequences=[tt.arange(X.shape[0]), tt.arange(X.shape[1])],
                     outputs_info=np.asarray(0., dtype=floatX))
   result = results[-1]
-  compute_trace = theano.function(inputs=[X], outputs=result)
+  compute_trace = aesara.function(inputs=[X], outputs=result)
 
   # test value
-  x = np.eye(5, dtype=theano.config.floatX)
-  x[0] = np.arange(5, dtype=theano.config.floatX)
+  x = np.eye(5, dtype=aesara.config.floatX)
+  x[0] = np.arange(5, dtype=aesara.config.floatX)
   print(compute_trace(x))
 
   # comparison with numpy
@@ -200,8 +200,8 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
 
   # define tensor variables
@@ -212,18 +212,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
   V = tt.matrix("V")
   n_sym = tt.iscalar("n_sym")
 
-  results, updates = theano.scan(lambda x_tm2, x_tm1: tt.dot(x_tm2, U) + tt.dot(x_tm1, V) + tt.tanh(tt.dot(x_tm1, W) + b_sym),
+  results, updates = aesara.scan(lambda x_tm2, x_tm1: tt.dot(x_tm2, U) + tt.dot(x_tm1, V) + tt.tanh(tt.dot(x_tm1, W) + b_sym),
                       n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
-  compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)
+  compute_seq2 = aesara.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)
 
   # test values
-  x = np.zeros((2, 2), dtype=theano.config.floatX) # the initial value must be able to return x[-2]
+  x = np.zeros((2, 2), dtype=aesara.config.floatX) # the initial value must be able to return x[-2]
   x[1, 1] = 1
-  w = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
-  u = 0.5 * (np.ones((2, 2), dtype=theano.config.floatX) - np.eye(2, dtype=theano.config.floatX))
-  v = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
+  w = 0.5 * np.ones((2, 2), dtype=aesara.config.floatX)
+  u = 0.5 * (np.ones((2, 2), dtype=aesara.config.floatX) - np.eye(2, dtype=aesara.config.floatX))
+  v = 0.5 * np.ones((2, 2), dtype=aesara.config.floatX)
   n = 10
-  b = np.ones((2), dtype=theano.config.floatX)
+  b = np.ones((2), dtype=aesara.config.floatX)
 
   print(compute_seq2(x, u, v, w, b, n))
 
@@ -265,21 +265,21 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
 
   # define tensor variables
   v = tt.vector()
   A = tt.matrix()
   y = tt.tanh(tt.dot(v, A))
-  results, updates = theano.scan(lambda i: tt.grad(y[i], v), sequences=[tt.arange(y.shape[0])])
-  compute_jac_t = theano.function([A, v], results, allow_input_downcast=True) # shape (d_out, d_in)
+  results, updates = aesara.scan(lambda i: tt.grad(y[i], v), sequences=[tt.arange(y.shape[0])])
+  compute_jac_t = aesara.function([A, v], results, allow_input_downcast=True) # shape (d_out, d_in)
 
   # test values
-  x = np.eye(5, dtype=theano.config.floatX)[0]
-  w = np.eye(5, 3, dtype=theano.config.floatX)
-  w[2] = np.ones((3), dtype=theano.config.floatX)
+  x = np.eye(5, dtype=aesara.config.floatX)[0]
+  w = np.eye(5, 3, dtype=aesara.config.floatX)
+  w[2] = np.ones((3), dtype=aesara.config.floatX)
   print(compute_jac_t(w, x))
 
   # compare with numpy
@@ -300,16 +300,16 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
 
   # define shared variables
-  k = theano.shared(0)
+  k = aesara.shared(0)
   n_sym = tt.iscalar("n_sym")
 
-  results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
-  accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)
+  results, updates = aesara.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
+  accumulator = aesara.function([n_sym], [], updates=updates, allow_input_downcast=True)
 
   k.get_value()
   accumulator(5)
@@ -319,8 +319,8 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
+  import aesara
+  import aesara.tensor as tt
   import numpy as np
 
   # define tensor variables
@@ -329,15 +329,15 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
   b_sym = tt.vector("b_sym")
 
   # define shared random stream
-  trng = theano.tensor.random.utils.RandomStream(1234)
+  trng = aesara.tensor.random.utils.RandomStream(1234)
   d=trng.binomial(size=W[1].shape)
 
-  results, updates = theano.scan(lambda v: tt.tanh(tt.dot(v, W) + b_sym) * d, sequences=X)
-  compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=results,
+  results, updates = aesara.scan(lambda v: tt.tanh(tt.dot(v, W) + b_sym) * d, sequences=X)
+  compute_with_bnoise = aesara.function(inputs=[X, W, b_sym], outputs=results,
                             updates=updates, allow_input_downcast=True)
-  x = np.eye(10, 2, dtype=theano.config.floatX)
-  w = np.ones((2, 2), dtype=theano.config.floatX)
-  b = np.ones((2), dtype=theano.config.floatX)
+  x = np.eye(10, 2, dtype=aesara.config.floatX)
+  w = np.ones((2, 2), dtype=aesara.config.floatX)
+  b = np.ones((2), dtype=aesara.config.floatX)
 
   print(compute_with_bnoise(x, w, b))
 
@@ -360,9 +360,9 @@ Note that if you want to use a random variable ``d`` that will not be updated th
 
 .. testcode::
 
-  import theano
-  import theano.tensor as tt
-  theano.config.warn__subtensor_merge_bug = False
+  import aesara
+  import aesara.tensor as tt
+  aesara.config.warn__subtensor_merge_bug = False
 
   k = tt.iscalar("k")
   A = tt.vector("A")
@@ -371,7 +371,7 @@ Note that if you want to use a random variable ``d`` that will not be updated th
       return prior_result * B
 
   # Symbolic description of the result
-  result, updates = theano.scan(fn=inner_fct,
+  result, updates = aesara.scan(fn=inner_fct,
                               outputs_info=tt.ones_like(A),
                               non_sequences=A, n_steps=k)
 
@@ -379,7 +379,7 @@ Note that if you want to use a random variable ``d`` that will not be updated th
   # value. Scan notices this and does not waste memory saving them.
   final_result = result[-1]
 
-  power = theano.function(inputs=[A, k], outputs=final_result,
+  power = aesara.function(inputs=[A, k], outputs=final_result,
                         updates=updates)
 
   print(power(range(10), 2))
@@ -394,24 +394,24 @@ Note that if you want to use a random variable ``d`` that will not be updated th
 .. testcode::
 
   import numpy
-  import theano
-  import theano.tensor as tt
-  theano.config.warn__subtensor_merge_bug = False
+  import aesara
+  import aesara.tensor as tt
+  aesara.config.warn__subtensor_merge_bug = False
 
-  coefficients = theano.tensor.vector("coefficients")
+  coefficients = aesara.tensor.vector("coefficients")
   x = tt.scalar("x")
   max_coefficients_supported = 10000
 
   # Generate the components of the polynomial
-  full_range=theano.tensor.arange(max_coefficients_supported)
-  components, updates = theano.scan(fn=lambda coeff, power, free_var:
+  full_range=aesara.tensor.arange(max_coefficients_supported)
+  components, updates = aesara.scan(fn=lambda coeff, power, free_var:
                                      coeff * (free_var ** power),
                                   outputs_info=None,
                                   sequences=[coefficients, full_range],
                                   non_sequences=x)
 
   polynomial = components.sum()
-  calculate_polynomial = theano.function(inputs=[coefficients, x],
+  calculate_polynomial = aesara.function(inputs=[coefficients, x],
                                        outputs=polynomial)
 
   test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
diff --git a/doc/tutorial/loop_solution_1.py b/doc/tutorial/loop_solution_1.py
index d3c791fa73..3faccae51c 100755
--- a/doc/tutorial/loop_solution_1.py
+++ b/doc/tutorial/loop_solution_1.py
@@ -1,16 +1,16 @@
 #!/usr/bin/env python
-# Theano tutorial
+# Aesara tutorial
 # Solution to Exercise in section 'Loop'
 
 import numpy as np
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
 
 
 # 1. First example
 
-theano.config.warn__subtensor_merge_bug = False
+aesara.config.warn__subtensor_merge_bug = False
 
 k = tt.iscalar("k")
 A = tt.vector("A")
@@ -20,7 +20,7 @@ def inner_fct(prior_result, A):
     return prior_result * A
 
 # Symbolic description of the result
-result, updates = theano.scan(fn=inner_fct,
+result, updates = aesara.scan(fn=inner_fct,
                               outputs_info=tt.ones_like(A),
                               non_sequences=A, n_steps=k)
 
@@ -28,7 +28,7 @@ def inner_fct(prior_result, A):
 # value. Scan notices this and does not waste memory saving them.
 final_result = result[-1]
 
-power = theano.function(inputs=[A, k], outputs=final_result,
+power = aesara.function(inputs=[A, k], outputs=final_result,
                         updates=updates)
 
 print(power(list(range(10)), 2))
@@ -43,13 +43,13 @@ def inner_fct(prior_result, A):
 
 # Generate the components of the polynomial
 full_range = tt.arange(max_coefficients_supported)
-components, updates = theano.scan(fn=lambda coeff, power, free_var:
+components, updates = aesara.scan(fn=lambda coeff, power, free_var:
                                   coeff * (free_var ** power),
                                   sequences=[coefficients, full_range],
                                   outputs_info=None,
                                   non_sequences=x)
 polynomial = components.sum()
-calculate_polynomial1 = theano.function(inputs=[coefficients, x],
+calculate_polynomial1 = aesara.function(inputs=[coefficients, x],
                                         outputs=polynomial)
 
 test_coeff = np.asarray([1, 0, 2], dtype=np.float32)
@@ -58,7 +58,7 @@ def inner_fct(prior_result, A):
 
 # 3. Reduction performed inside scan
 
-theano.config.warn__subtensor_merge_bug = False
+aesara.config.warn__subtensor_merge_bug = False
 
 coefficients = tt.vector("coefficients")
 x = tt.scalar("x")
@@ -70,14 +70,14 @@ def inner_fct(prior_result, A):
 
 outputs_info = tt.as_tensor_variable(np.asarray(0, 'float64'))
 
-components, updates = theano.scan(fn=lambda coeff, power, prior_value, free_var:
+components, updates = aesara.scan(fn=lambda coeff, power, prior_value, free_var:
                                   prior_value + (coeff * (free_var ** power)),
                                   sequences=[coefficients, full_range],
                                   outputs_info=outputs_info,
                                   non_sequences=x)
 
 polynomial = components[-1]
-calculate_polynomial = theano.function(inputs=[coefficients, x],
+calculate_polynomial = aesara.function(inputs=[coefficients, x],
                                        outputs=polynomial, updates=updates)
 
 test_coeff = np.asarray([1, 0, 2], dtype=np.float32)
diff --git a/doc/tutorial/modes.txt b/doc/tutorial/modes.txt
index e46f54c776..577c330468 100644
--- a/doc/tutorial/modes.txt
+++ b/doc/tutorial/modes.txt
@@ -9,29 +9,29 @@ Configuration Settings and Compiling Modes
 Configuration
 =============
 
-The ``config`` module contains several *attributes* that modify Theano's behavior.  Many of these
-attributes are examined during the import of the ``theano`` module and several are assumed to be
+The ``config`` module contains several *attributes* that modify Aesara's behavior.  Many of these
+attributes are examined during the import of the ``aesara`` module and several are assumed to be
 read-only.
 
 *As a rule, the attributes in the* ``config`` *module should not be modified inside the user code.*
 
-Theano's code comes with default values for these attributes, but you can
-override them from your ``.theanorc`` file, and override those values in turn by
-the :envvar:`THEANO_FLAGS` environment variable.
+Aesara's code comes with default values for these attributes, but you can
+override them from your ``.aesararc`` file, and override those values in turn by
+the :envvar:`AESARA_FLAGS` environment variable.
 
 The order of precedence is:
 
-1. an assignment to theano.config.<property>
-2. an assignment in :envvar:`THEANO_FLAGS`
-3. an assignment in the .theanorc file (or the file indicated in :envvar:`THEANORC`)
+1. an assignment to aesara.config.<property>
+2. an assignment in :envvar:`AESARA_FLAGS`
+3. an assignment in the .aesararc file (or the file indicated in :envvar:`AESARARC`)
 
 You can display the current/effective configuration at any time by printing
-theano.config.  For example, to see a list  of all active configuration
+aesara.config.  For example, to see a list  of all active configuration
 variables, type this from the command-line:
 
 .. code-block:: bash
 
-    python -c 'import theano; print(theano.config)' | less
+    python -c 'import aesara; print(aesara.config)' | less
 
 
 For more detail, see :ref:`Configuration <libdoc_config>` in the library.
@@ -46,25 +46,25 @@ Consider the logistic regression:
 .. testcode::
 
     import numpy
-    import theano
-    import theano.tensor as tt
+    import aesara
+    import aesara.tensor as tt
     rng = numpy.random
 
     N = 400
     feats = 784
-    D = (rng.randn(N, feats).astype(theano.config.floatX),
-    rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
+    D = (rng.randn(N, feats).astype(aesara.config.floatX),
+    rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX))
     training_steps = 10000
 
-    # Declare Theano symbolic variables
+    # Declare Aesara symbolic variables
     x = tt.matrix("x")
     y = tt.vector("y")
-    w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
-    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
+    w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
+    b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b")
     x.tag.test_value = D[0]
     y.tag.test_value = D[1]
 
-    # Construct Theano expression graph
+    # Construct Aesara expression graph
     p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
     prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
     xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
@@ -72,12 +72,12 @@ Consider the logistic regression:
     gw,gb = tt.grad(cost, [w,b])
 
     # Compile expressions to functions
-    train = theano.function(
+    train = aesara.function(
                 inputs=[x,y],
                 outputs=[prediction, xent],
                 updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
                 name = "train")
-    predict = theano.function(inputs=[x], outputs=prediction,
+    predict = aesara.function(inputs=[x], outputs=prediction,
                 name = "predict")
 
     if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
@@ -87,7 +87,7 @@ Consider the logistic regression:
               train.maker.fgraph.toposort()]):
         print('Used the gpu')
     else:
-        print('ERROR, not able to tell if theano used the cpu or the gpu')
+        print('ERROR, not able to tell if aesara used the cpu or the gpu')
         print(train.maker.fgraph.toposort())
 
     for i in range(training_steps):
@@ -115,7 +115,7 @@ as it will be useful later on.
 
 .. Note::
 
-   * Apply the Theano flag ``floatX=float32`` (through ``theano.config.floatX``) in your code.
+   * Apply the Aesara flag ``floatX=float32`` (through ``aesara.config.floatX``) in your code.
    * Cast inputs before storing them into a shared variable.
    * Circumvent the automatic cast of *int32* with *float32* to *float64*:
 
@@ -130,12 +130,12 @@ as it will be useful later on.
 Mode
 ====
 
-Every time :func:`theano.function <function.function>` is called,
-the symbolic relationships between the input and output Theano *variables*
+Every time :func:`aesara.function <function.function>` is called,
+the symbolic relationships between the input and output Aesara *variables*
 are optimized and compiled. The way this compilation occurs
 is controlled by the value of the ``mode`` parameter.
 
-Theano defines the following modes by name:
+Aesara defines the following modes by name:
 
 - ``'FAST_COMPILE'``: Apply just a few graph optimizations and only use Python implementations. So GPU is disabled.
 - ``'FAST_RUN'``: Apply all optimizations and use C implementations where possible.
@@ -147,7 +147,7 @@ Theano defines the following modes by name:
 The default mode is typically ``FAST_RUN``, but it can be controlled via
 the configuration variable :attr:`config.mode`,
 which can be overridden by passing the keyword argument to
-:func:`theano.function <function.function>`.
+:func:`aesara.function <function.function>`.
 
 ================= =============================================================== ===============================================================================
 short name        Full constructor                                                What does it do?
@@ -171,7 +171,7 @@ A mode is composed of 2 things: an optimizer and a linker. Some modes,
 like ``NanGuardMode`` and ``DebugMode``, add logic around the
 optimizer and linker. ``DebugMode`` uses its own linker.
 
-You can select which linker to use with the Theano flag :attr:`config.linker`.
+You can select which linker to use with the Aesara flag :attr:`config.linker`.
 Here is a table to compare the different linkers.
 
 =============  =========  =================  =========  ===
@@ -184,13 +184,13 @@ c|py_nogc      no         yes                "++"       As c|py, but without gc
 c              no         yes                "+"        Use only C code (if none available for an op, raise an error)
 py             yes        yes                "+++"      Use only Python code
 NanGuardMode   yes        yes                "++++"     Check if nodes generate NaN
-DebugMode      no         yes                VERY HIGH  Make many checks on what Theano computes
+DebugMode      no         yes                VERY HIGH  Make many checks on what Aesara computes
 =============  =========  =================  =========  ===
 
 
 .. [#gc] Garbage collection of intermediate results during computation.
          Otherwise, their memory space used by the ops is kept between
-         Theano function calls, in order not to
+         Aesara function calls, in order not to
          reallocate memory, and lower the overhead (make it faster...).
 .. [#cpy1] Default
 
@@ -202,20 +202,20 @@ For more detail, see :ref:`Mode<libdoc_compile_mode>` in the library.
 Optimizers
 ==========
 
-Theano allows compilations with a number of predefined optimizers.
+Aesara allows compilations with a number of predefined optimizers.
 An optimizer consists of a particular set of optimizations, that speed
-up execution of Theano programs.
+up execution of Aesara programs.
 
-The optimizers Theano provides are summarized below to indicate the trade-offs
+The optimizers Aesara provides are summarized below to indicate the trade-offs
 one might make between compilation time and execution time.
 
-These optimizers can be enabled globally with the Theano flag: ``optimizer=name``
-or per call to theano functions with ``function(...mode=Mode(optimizer="name"))``.
+These optimizers can be enabled globally with the Aesara flag: ``optimizer=name``
+or per call to aesara functions with ``function(...mode=Mode(optimizer="name"))``.
 
 =================  ============  ==============  ==================================================
 optimizer          Compile time  Execution time  Description
 =================  ============  ==============  ==================================================
-None               "++++++"      "+"             Applies none of Theano's opts
+None               "++++++"      "+"             Applies none of Aesara's opts
 o1 (fast_compile)  "+++++"       "++"            Applies only basic opts
 o2                 "++++"        "+++"           Applies few basic opts and some that compile fast
 o3                 "+++"         "++++"          Applies all opts except ones that compile slower
@@ -226,7 +226,7 @@ stabilize          "+++++"       "++"            Only applies stability opts
 
 For a detailed list of the specific optimizations applied for each of these
 optimizers, see :ref:`optimizations`. Also, see :ref:`unsafe_optimization` and
-:ref:`faster-theano-function-compilation` for other trade-off.
+:ref:`faster-aesara-function-compilation` for other trade-off.
 
 
 .. _using_debugmode:
@@ -254,7 +254,7 @@ DebugMode is used as follows:
 
     x = tt.dvector('x')
 
-    f = theano.function([x], 10 * x, mode='DebugMode')
+    f = aesara.function([x], 10 * x, mode='DebugMode')
 
     f([5])
     f([0])
@@ -263,8 +263,8 @@ DebugMode is used as follows:
 
 If any problem is detected, DebugMode will raise an exception according to
 what went wrong, either at call time (*f(5)*) or compile time (
-``f = theano.function(x, 10 * x, mode='DebugMode')``). These exceptions
-should *not* be ignored; talk to your local Theano guru or email the
+``f = aesara.function(x, 10 * x, mode='DebugMode')``). These exceptions
+should *not* be ignored; talk to your local Aesara guru or email the
 users list if you cannot make the exception go away.
 
 Some kinds of errors can only be detected for certain input value combinations.
diff --git a/doc/tutorial/modes_solution_1.py b/doc/tutorial/modes_solution_1.py
index 87bee7c357..bcf9dcdb29 100755
--- a/doc/tutorial/modes_solution_1.py
+++ b/doc/tutorial/modes_solution_1.py
@@ -1,33 +1,33 @@
 #!/usr/bin/env python
-# Theano tutorial
+# Aesara tutorial
 # Solution to Exercise in section 'Configuration Settings and Compiling Modes'
 
 
 import numpy as np
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
 
-theano.config.floatX = 'float32'
+aesara.config.floatX = 'float32'
 
 rng = np.random
 
 N = 400
 feats = 784
-D = (rng.randn(N, feats).astype(theano.config.floatX),
-rng.randint(size=N, low=0, high=2).astype(theano.config.floatX))
+D = (rng.randn(N, feats).astype(aesara.config.floatX),
+rng.randint(size=N, low=0, high=2).astype(aesara.config.floatX))
 training_steps = 10000
 
-# Declare Theano symbolic variables
+# Declare Aesara symbolic variables
 x = tt.matrix("x")
 y = tt.vector("y")
-w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
-b = theano.shared(np.asarray(0., dtype=theano.config.floatX), name="b")
+w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
+b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
 x.tag.test_value = D[0]
 y.tag.test_value = D[1]
 #print "Initial model:"
 #print w.get_value(), b.get_value()
 
-# Construct Theano expression graph
+# Construct Aesara expression graph
 p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))  # Probability of having a one
 prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
 xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy
@@ -36,12 +36,12 @@
 gw, gb = tt.grad(cost, [w, b])
 
 # Compile expressions to functions
-train = theano.function(
+train = aesara.function(
             inputs=[x, y],
             outputs=[prediction, xent],
             updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
             name="train")
-predict = theano.function(inputs=[x], outputs=prediction,
+predict = aesara.function(inputs=[x], outputs=prediction,
             name="predict")
 
 if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
@@ -51,7 +51,7 @@
 train.maker.fgraph.toposort()]):
     print('Used the gpu')
 else:
-    print('ERROR, not able to tell if theano used the cpu or the gpu')
+    print('ERROR, not able to tell if aesara used the cpu or the gpu')
     print(train.maker.fgraph.toposort())
 
 for i in range(training_steps):
diff --git a/doc/tutorial/multi_cores.txt b/doc/tutorial/multi_cores.txt
index 9bec93b5d4..c609ef86f6 100644
--- a/doc/tutorial/multi_cores.txt
+++ b/doc/tutorial/multi_cores.txt
@@ -1,14 +1,13 @@
 .. _tut_multi_cores:
 
 =============================
-Multi cores support in Theano
+Multi cores support in Aesara
 =============================
 
 Convolution and Pooling
 =======================
 
-Since Theano 0.9dev2, the convolution and pooling are parallelized on
-CPU.
+The convolution and pooling are parallelized on CPU.
 
 
 BLAS operation
@@ -20,19 +19,19 @@ between vector/matrix and matrix/matrix). Many different
 implementations of that interface exist and some of them are
 parallelized.
 
-Theano tries to use that interface as frequently as possible for
-performance reasons. So if Theano links to a parallel implementation,
-those operations will run in parallel in Theano.
+Aesara tries to use that interface as frequently as possible for
+performance reasons. So if Aesara links to a parallel implementation,
+those operations will run in parallel in Aesara.
 
 The most frequent way to control the number of threads used is via the
 ``OMP_NUM_THREADS`` environment variable. Set it to the number of
 threads you want to use before starting the Python process. Some BLAS
 implementations support other environment variables.
 
-To test if you BLAS supports OpenMP/Multiple cores, you can use the theano/misc/check_blas.py script from the command line like this::
+To test if you BLAS supports OpenMP/Multiple cores, you can use the aesara/misc/check_blas.py script from the command line like this::
 
-    OMP_NUM_THREADS=1 python theano/misc/check_blas.py -q
-    OMP_NUM_THREADS=2 python theano/misc/check_blas.py -q
+    OMP_NUM_THREADS=1 python aesara/misc/check_blas.py -q
+    OMP_NUM_THREADS=2 python aesara/misc/check_blas.py -q
 
 
 
@@ -54,7 +53,7 @@ For simple (fast) operations you can obtain a speed-up with very large
 tensors while for more complex operations you can obtain a good speed-up
 also for smaller tensors.
 
-There is a script ``elemwise_openmp_speedup.py`` in ``theano/misc/``
+There is a script ``elemwise_openmp_speedup.py`` in ``aesara/misc/``
 which you can use to tune the value of ``openmp_elemwise_minsize`` for
 your machine.  The script runs two elemwise operations (a fast one and
 a slow one) for a vector of size ``openmp_elemwise_minsize`` with and
@@ -66,7 +65,7 @@ threads you want to use before starting the Python process. You can
 test this with this command::
 
 
-    OMP_NUM_THREADS=2 python theano/misc/elemwise_openmp_speedup.py
+    OMP_NUM_THREADS=2 python aesara/misc/elemwise_openmp_speedup.py
     #The output
 
     Fast op time without openmp 0.000533s with openmp 0.000474s speedup 1.12
diff --git a/doc/tutorial/nan_tutorial.txt b/doc/tutorial/nan_tutorial.txt
index de9c25a4fd..4059059eee 100644
--- a/doc/tutorial/nan_tutorial.txt
+++ b/doc/tutorial/nan_tutorial.txt
@@ -11,7 +11,7 @@ because sometimes it is caused by a bug or error in the code, sometimes it's
 because of the numerical stability of your computational environment (library
 versions, etc.), and even, sometimes it relates to your algorithm. Here we try
 to outline common issues which cause the model to yield NaNs, as well as
-provide nails and hammers to diagnose it. 
+provide nails and hammers to diagnose it.
 
 
 Check Superparameters and Weight Initialization
@@ -30,26 +30,26 @@ two) training epochs each to see if the NaNs could disappear.
 
 Some models can be very sensitive to the initialization of weight vectors. If
 those weights are not initialized in a proper range, then it is not surprising
-that the model ends up with yielding NaNs. 
+that the model ends up with yielding NaNs.
 
 
 Run in NanGuardMode, DebugMode, or MonitorMode
 -----------------------------------------------
 
 If adjusting hyperparameters doesn't work for you, you can still get help from
-Theano's NanGuardMode. Change the mode of your theano function to NanGuardMode
+Aesara's NanGuardMode. Change the mode of your aesara function to NanGuardMode
 and run them again. The NanGuardMode will monitor all input/output variables in
 each node, and raises an error if NaNs are detected. For how to use the
-``NanGuardMode``, please refer to :ref:`nanguardmode`. Using ``optimizer_including=alloc_empty_to_zeros`` 
-with ``NanGuardMode`` could be helpful to detect NaN, for more information please refer 
-to :ref:`AllocEmpty`. 
+``NanGuardMode``, please refer to :ref:`nanguardmode`. Using ``optimizer_including=alloc_empty_to_zeros``
+with ``NanGuardMode`` could be helpful to detect NaN, for more information please refer
+to :ref:`AllocEmpty`.
 
 DebugMode can also help. Run your code in DebugMode with flag
 ``mode=DebugMode,DebugMode__check_py=False``. This will give you clue about which
 op is causing this problem, and then you can inspect that op in more detail. For
 details of using ``DebugMode``, please refer to :ref:`debugmode`.
 
-Theano's MonitorMode provides another helping hand. It can be used to step
+Aesara's MonitorMode provides another helping hand. It can be used to step
 through the execution of a function. You can inspect the inputs and outputs of
 each node being executed when the function is called. For how to use that,
 please check :ref:`faq_monitormode`.
@@ -76,7 +76,7 @@ and find out if everything is derived correctly.
 CUDA Specific Option
 --------------------
 
-The Theano flag ``nvcc.fastmath=True`` can genarate NaN. Don't set
+The Aesara flag ``nvcc.fastmath=True`` can genarate NaN. Don't set
 this flag while debugging NaN.
 
 
@@ -87,4 +87,4 @@ NaN Introduced by AllocEmpty
 
 AllocEmpty is used by many operation such as scan to allocate some memory without properly clearing it. The reason for that is that the allocated memory will subsequently be overwritten. However, this can sometimes introduce NaN depending on the operation and what was previously stored in the memory it is working on. For instance, trying to zero out memory  using a multiplication before applying an operation could cause NaN if NaN is already present in the memory, since `0 * NaN => NaN`.
 
-Using ``optimizer_including=alloc_empty_to_zeros`` replaces `AllocEmpty` by `Alloc{0}`, which is helpful to diagnose where NaNs come from. Please note that when running in `NanGuardMode`, this optimizer is not included by default. Therefore, it might be helpful to use them both together. 
+Using ``optimizer_including=alloc_empty_to_zeros`` replaces `AllocEmpty` by `Alloc{0}`, which is helpful to diagnose where NaNs come from. Please note that when running in `NanGuardMode`, this optimizer is not included by default. Therefore, it might be helpful to use them both together.
diff --git a/doc/tutorial/printing_drawing.txt b/doc/tutorial/printing_drawing.txt
index 4a0dbd134f..7ce13e221e 100644
--- a/doc/tutorial/printing_drawing.txt
+++ b/doc/tutorial/printing_drawing.txt
@@ -2,23 +2,23 @@
 .. _tutorial_printing_drawing:
 
 ==============================
-Printing/Drawing Theano graphs
+Printing/Drawing Aesara graphs
 ==============================
 
 
-Theano provides the functions :func:`theano.printing.pprint` and
-:func:`theano.printing.debugprint` to print a graph to the terminal before or
+Aesara provides the functions :func:`aesara.printing.pprint` and
+:func:`aesara.printing.debugprint` to print a graph to the terminal before or
 after compilation. :func:`pprint` is more compact and math-like,
-:func:`debugprint` is more verbose. Theano also provides :func:`pydotprint`
+:func:`debugprint` is more verbose. Aesara also provides :func:`pydotprint`
 that creates an image of the function. You can read about them in
 :ref:`libdoc_printing`.
 
 .. note::
 
 
-    When printing Theano functions, they can sometimes be hard to
-    read.  To help with this, you can disable some Theano optimizations
-    by using the Theano flag:
+    When printing Aesara functions, they can sometimes be hard to
+    read.  To help with this, you can disable some Aesara optimizations
+    by using the Aesara flag:
     ``optimizer_excluding=fusion:inplace``. Do not use this during
     real job execution, as this will make the graph slower and use more
     memory.
@@ -26,22 +26,22 @@ that creates an image of the function. You can read about them in
 Consider again the logistic regression example:
 
 >>> import numpy
->>> import theano
->>> import theano.tensor as tt
+>>> import aesara
+>>> import aesara.tensor as tt
 >>> rng = numpy.random
 >>> # Training data
 >>> N = 400
 >>> feats = 784
->>> D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
+>>> D = (rng.randn(N, feats).astype(aesara.config.floatX), rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX))
 >>> training_steps = 10000
->>> # Declare Theano symbolic variables
+>>> # Declare Aesara symbolic variables
 >>> x = tt.matrix("x")
 >>> y = tt.vector("y")
->>> w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
->>> b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
+>>> w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
+>>> b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b")
 >>> x.tag.test_value = D[0]
 >>> y.tag.test_value = D[1]
->>> # Construct Theano expression graph
+>>> # Construct Aesara expression graph
 >>> p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
 >>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
 >>> # Compute gradients
@@ -49,14 +49,14 @@ Consider again the logistic regression example:
 >>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
 >>> gw,gb = tt.grad(cost, [w,b])
 >>> # Training and prediction function
->>> train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
->>> predict = theano.function(inputs=[x], outputs=prediction, name = "predict")
+>>> train = aesara.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
+>>> predict = aesara.function(inputs=[x], outputs=prediction, name = "predict")
 
 
 Pretty Printing
 ===============
 
->>> theano.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
+>>> aesara.printing.pprint(prediction) # doctest: +NORMALIZE_WHITESPACE
 'gt((TensorConstant{1} / (TensorConstant{1} + exp(((-(x \\dot w)) - b)))),
 TensorConstant{0.5})'
 
@@ -66,7 +66,7 @@ Debug Print
 
 The pre-compilation graph:
 
->>> theano.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+>>> aesara.printing.debugprint(prediction) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
 Elemwise{gt,no_inplace} [id A] ''
  |Elemwise{true_div,no_inplace} [id B] ''
  | |InplaceDimShuffle{x} [id C] ''
@@ -87,7 +87,7 @@ Elemwise{gt,no_inplace} [id A] ''
 
 The post-compilation graph:
 
->>> theano.printing.debugprint(predict)  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+>>> aesara.printing.debugprint(predict)  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
 Elemwise{Composite{GT(scalar_sigmoid((-((-i0) - i1))), i2)}} [id A] ''   4
  |...Gemv{inplace} [id B] ''   3
  | |AllocEmpty{dtype='float64'} [id C] ''   2
@@ -107,7 +107,7 @@ Picture Printing of Graphs
 
 The pre-compilation graph:
 
->>> theano.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)  # doctest: +SKIP
+>>> aesara.printing.pydotprint(prediction, outfile="pics/logreg_pydotprint_prediction.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_prediction.png
 
 .. image:: ./pics/logreg_pydotprint_prediction.png
@@ -115,7 +115,7 @@ The output file is available at pics/logreg_pydotprint_prediction.png
 
 The post-compilation graph:
 
->>> theano.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)  # doctest: +SKIP
+>>> aesara.printing.pydotprint(predict, outfile="pics/logreg_pydotprint_predict.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_predict.png
 
 .. image:: ./pics/logreg_pydotprint_predict.png
@@ -123,7 +123,7 @@ The output file is available at pics/logreg_pydotprint_predict.png
 
 The optimized training graph:
 
->>> theano.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)  # doctest: +SKIP
+>>> aesara.printing.pydotprint(train, outfile="pics/logreg_pydotprint_train.png", var_with_name_simple=True)  # doctest: +SKIP
 The output file is available at pics/logreg_pydotprint_train.png
 
 .. image:: ./pics/logreg_pydotprint_train.png
@@ -133,7 +133,7 @@ The output file is available at pics/logreg_pydotprint_train.png
 Interactive Graph Visualization
 ===============================
 
-The new :mod:`d3viz` module complements :func:`theano.printing.pydotprint` to
+The new :mod:`d3viz` module complements :func:`aesara.printing.pydotprint` to
 visualize complex graph structures. Instead of creating a static image, it
 generates an HTML file, which allows to dynamically inspect graph structures in
 a web browser. Features include zooming, drag-and-drop, editing node labels, or
diff --git a/doc/tutorial/profiling.txt b/doc/tutorial/profiling.txt
index be388a9984..36f4cafe53 100644
--- a/doc/tutorial/profiling.txt
+++ b/doc/tutorial/profiling.txt
@@ -2,7 +2,7 @@
 .. _tut_profiling:
 
 =========================
-Profiling Theano function
+Profiling Aesara function
 =========================
 
 .. note::
@@ -17,43 +17,43 @@ You can profile your
 functions using either of the following two options:
 
 
-1. Use Theano flag :attr:`config.profile` to enable profiling.
-    - To enable the memory profiler use the Theano flag:
+1. Use Aesara flag :attr:`config.profile` to enable profiling.
+    - To enable the memory profiler use the Aesara flag:
       :attr:`config.profile_memory` in addition to :attr:`config.profile`.
-    - Moreover, to enable the profiling of Theano optimization phase,
-      use the Theano flag: :attr:`config.profile_optimizer` in addition
+    - Moreover, to enable the profiling of Aesara optimization phase,
+      use the Aesara flag: :attr:`config.profile_optimizer` in addition
       to :attr:`config.profile`.
-    - You can also use the Theano flags :attr:`profiling__n_apply`,
+    - You can also use the Aesara flags :attr:`profiling__n_apply`,
       :attr:`profiling__n_ops` and :attr:`profiling__min_memory_size`
       to modify the quantity of information printed.
 
-2. Pass the argument :attr:`profile=True` to the function :func:`theano.function <function.function>`. And then call :attr:`f.profile.summary()` for a single function.
+2. Pass the argument :attr:`profile=True` to the function :func:`aesara.function <function.function>`. And then call :attr:`f.profile.summary()` for a single function.
     - Use this option when you want to profile not all the
       functions but one or more specific function(s).
-    - You can also combine the profile of many functions: 
-    
+    - You can also combine the profile of many functions:
+
       .. doctest::
           :hide:
 
-          profile = theano.compile.ProfileStats()
-          f = theano.function(..., profile=profile)  # doctest: +SKIP
-          g = theano.function(..., profile=profile)  # doctest: +SKIP
+          profile = aesara.compile.ProfileStats()
+          f = aesara.function(..., profile=profile)  # doctest: +SKIP
+          g = aesara.function(..., profile=profile)  # doctest: +SKIP
           ...  # doctest: +SKIP
           profile.summary()
 
 
 
-The profiler will output one profile per Theano function and profile
+The profiler will output one profile per Aesara function and profile
 that is the sum of the printed profiles. Each profile contains 4
 sections: global info, class info, Ops info and Apply node info.
 
-In the global section, the "Message" is the name of the Theano
-function. theano.function() has an optional parameter ``name`` that
+In the global section, the "Message" is the name of the Aesara
+function. aesara.function() has an optional parameter ``name`` that
 defaults to None. Change it to something else to help you profile many
-Theano functions. In that section, we also see the number of times the
+Aesara functions. In that section, we also see the number of times the
 function was called (1) and the total time spent in all those
 calls. The time spent in Function.fn.__call__ and in thunks is useful
-to understand Theano overhead.
+to understand Aesara overhead.
 
 Also, we see the time spent in the two parts of the compilation
 process: optimization (modify the graph to make it more stable/faster)
@@ -78,13 +78,13 @@ a graph optimization that eliminates the offending Op altogether.
 You should strongly consider emailing one of our lists about your
 issue before spending too much time on this.
 
-Here is an example output when we disable some Theano optimizations to
+Here is an example output when we disable some Aesara optimizations to
 give you a better idea of the difference between sections. With all
 optimizations enabled, there would be only one op left in the graph.
 
 to run the example:
 
-  THEANO_FLAGS=optimizer_excluding=fusion:inplace,profile=True python doc/tutorial/profiling_example.py
+  AESARA_FLAGS=optimizer_excluding=fusion:inplace,profile=True python doc/tutorial/profiling_example.py
 
 The output:
 
diff --git a/doc/tutorial/profiling_example.py b/doc/tutorial/profiling_example.py
index 8d22f2d4c3..692818a8d1 100644
--- a/doc/tutorial/profiling_example.py
+++ b/doc/tutorial/profiling_example.py
@@ -1,11 +1,11 @@
 
 import numpy as np
 
-import theano
+import aesara
 
-x, y, z = theano.tensor.vectors('xyz')
-f = theano.function([x, y, z], [(x + y + z) * 2])
-xv = np.random.rand(10).astype(theano.config.floatX)
-yv = np.random.rand(10).astype(theano.config.floatX)
-zv = np.random.rand(10).astype(theano.config.floatX)
+x, y, z = aesara.tensor.vectors('xyz')
+f = aesara.function([x, y, z], [(x + y + z) * 2])
+xv = np.random.rand(10).astype(aesara.config.floatX)
+yv = np.random.rand(10).astype(aesara.config.floatX)
+zv = np.random.rand(10).astype(aesara.config.floatX)
 f(xv, yv, zv)
diff --git a/doc/tutorial/profiling_example_out.prof b/doc/tutorial/profiling_example_out.prof
index aafc00d5cb..1451d9b305 100644
--- a/doc/tutorial/profiling_example_out.prof
+++ b/doc/tutorial/profiling_example_out.prof
@@ -5,14 +5,14 @@ Function profiling
   Time in Function.fn.__call__: 1.192093e-05s (20.921%)
   Time in thunks: 6.198883e-06s (10.879%)
   Total compile time: 3.642474e+00s
-    Theano Optimizer time: 7.326508e-02s
-       Theano validate time: 3.712177e-04s
-    Theano Linker time (includes C, CUDA code generation/compiling): 9.584920e-01s
+    Aesara Optimizer time: 7.326508e-02s
+       Aesara validate time: 3.712177e-04s
+    Aesara Linker time (includes C, CUDA code generation/compiling): 9.584920e-01s
 
 Class
 ---
 <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
-  100.0%   100.0%       0.000s       2.07e-06s     C        3        3   <class 'theano.tensor.elemwise.Elemwise'>
+  100.0%   100.0%       0.000s       2.07e-06s     C        3        3   <class 'aesara.tensor.elemwise.Elemwise'>
    ... (remaining 0 Classes account for   0.00%(0.00s) of the runtime)
 
 Ops
diff --git a/doc/tutorial/shape_info.txt b/doc/tutorial/shape_info.txt
index ddcae09472..368fbcfbff 100644
--- a/doc/tutorial/shape_info.txt
+++ b/doc/tutorial/shape_info.txt
@@ -1,29 +1,28 @@
 .. _shape_info:
 
 ==========================================
-How Shape Information is Handled by Theano
+How Shape Information is Handled by Aesara
 ==========================================
 
-It is not possible to strictly enforce the shape of a Theano variable when
+It is not possible to strictly enforce the shape of an Aesara variable when
 building a graph since the particular value provided at run-time for a parameter of a
-Theano function may condition the shape of the Theano variables in its graph.
+Aesara function may condition the shape of the Aesara variables in its graph.
 
-Currently, information regarding shape is used in two ways in Theano:
+Currently, information regarding shape is used in two ways in Aesara:
 
 - To generate faster C code for the 2d convolution on the CPU and the GPU,
   when the exact output shape is known in advance.
 
 - To remove computations in the graph when we only want to know the
   shape, but not the actual value of a variable. This is done with the
-  `Op.infer_shape <http://deeplearning.net/software/theano/extending/cop.html#Op.infer_shape>`_
-  method.
+  `Op.infer_shape` method.
 
   Example:
 
->>> import theano
->>> x = theano.tensor.matrix('x')
->>> f = theano.function([x], (x ** 2).shape)
->>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
+>>> import aesara
+>>> x = aesara.tensor.matrix('x')
+>>> f = aesara.function([x], (x ** 2).shape)
+>>> aesara.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
 MakeVector{dtype='int64'} [id A] ''   2
  |Shape_i{0} [id B] ''   1
  | |x [id C]
@@ -32,25 +31,25 @@ MakeVector{dtype='int64'} [id A] ''   2
 
 
 The output of this compiled function does not contain any multiplication
-or power. Theano has removed them to compute directly the shape of the
+or power. Aesara has removed them to compute directly the shape of the
 output.
 
 Shape Inference Problem
 =======================
 
-Theano propagates information about shape in the graph. Sometimes this
+Aesara propagates information about shape in the graph. Sometimes this
 can lead to errors. Consider this example:
 
 >>> import numpy
->>> import theano
->>> x = theano.tensor.matrix('x')
->>> y = theano.tensor.matrix('y')
->>> z = theano.tensor.join(0, x, y)
+>>> import aesara
+>>> x = aesara.tensor.matrix('x')
+>>> y = aesara.tensor.matrix('y')
+>>> z = aesara.tensor.join(0, x, y)
 >>> xv = numpy.random.rand(5, 4)
 >>> yv = numpy.random.rand(3, 3)
 
->>> f = theano.function([x, y], z.shape)
->>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
+>>> f = aesara.function([x, y], z.shape)
+>>> aesara.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
 MakeVector{dtype='int64'} [id A] ''   4
  |Elemwise{Add}[(0, 0)] [id B] ''   3
  | |Shape_i{0} [id C] ''   2
@@ -63,8 +62,8 @@ MakeVector{dtype='int64'} [id A] ''   4
 >>> f(xv, yv) # DOES NOT RAISE AN ERROR AS SHOULD BE.
 array([8, 4])
 
->>> f = theano.function([x,y], z)# Do not take the shape.
->>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
+>>> f = aesara.function([x,y], z)# Do not take the shape.
+>>> aesara.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
 Join [id A] ''   0
  |TensorConstant{0} [id B]
  |x [id C]
@@ -81,15 +80,15 @@ the computation itself (there is no ``join`` in the first output or debugprint).
 
 This makes the computation of the shape faster, but it can also hide errors. In
 this example, the computation of the shape of the output of ``join`` is done only
-based on the first input Theano variable, which leads to an error.
+based on the first input Aesara variable, which leads to an error.
 
 This might happen with other ops such as ``elemwise`` and ``dot``, for example.
 Indeed, to perform some optimizations (for speed or stability, for instance),
-Theano assumes that the computation is correct and consistent
+Aesara assumes that the computation is correct and consistent
 in the first place, as it does here.
 
 You can detect those problems by running the code without this
-optimization, using the Theano flag
+optimization, using the Aesara flag
 ``optimizer_excluding=local_shape_to_shape_i``. You can also obtain the
 same effect by running in the modes ``FAST_COMPILE`` (it will not apply this
 optimization, nor most other optimizations) or ``DebugMode`` (it will test
@@ -109,25 +108,25 @@ upgrade.  Here is the current state of what can be done:
 
 .. code-block:: python
 
-    theano.tensor.nnet.conv2d(..., image_shape=(7, 3, 5, 5), filter_shape=(2, 3, 4, 4))
+    aesara.tensor.nnet.conv2d(..., image_shape=(7, 3, 5, 5), filter_shape=(2, 3, 4, 4))
 
 - You can use the ``SpecifyShape`` op to add shape information anywhere in the
   graph. This allows to perform some optimizations. In the following example,
-  this makes it possible to precompute the Theano function to a constant.
+  this makes it possible to precompute the Aesara function to a constant.
 
 
->>> import theano
->>> x = theano.tensor.matrix()
->>> x_specify_shape = theano.tensor.specify_shape(x, (2, 2))
->>> f = theano.function([x], (x_specify_shape ** 2).shape)
->>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
+>>> import aesara
+>>> x = aesara.tensor.matrix()
+>>> x_specify_shape = aesara.tensor.specify_shape(x, (2, 2))
+>>> f = aesara.function([x], (x_specify_shape ** 2).shape)
+>>> aesara.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
 DeepCopyOp [id A] ''   0
  |TensorConstant{(2,) of 2} [id B]
 
 Future Plans
 ============
 
-  The parameter "constant shape" will be added to ``theano.shared()``. This is probably
+  The parameter "constant shape" will be added to ``aesara.shared()``. This is probably
   the most frequent occurrence with ``shared`` variables. It will make the code
   simpler and will make it possible to check that the shape does not change when
   updating the ``shared`` variable.
diff --git a/doc/tutorial/sparse.txt b/doc/tutorial/sparse.txt
index d3aa93bdbd..c7a0436e46 100644
--- a/doc/tutorial/sparse.txt
+++ b/doc/tutorial/sparse.txt
@@ -13,7 +13,7 @@ storage methods may lead to reduced computation time through the use of
 sparse specific algorithms. We usually refer to the generically stored matrices
 as *dense* matrices.
 
-Theano's sparse package provides efficient algorithms, but its use is not recommended
+Aesara's sparse package provides efficient algorithms, but its use is not recommended
 in all cases or for all matrices. As an obvious example, consider the case where
 the *sparsity proportion* is very low. The *sparsity proportion* refers to the
 ratio of the number of zero elements to the number of all elements in a matrix.
@@ -27,12 +27,12 @@ of the matrices. More documentation may be found in the
 
 Since sparse matrices are not stored in contiguous arrays, there are several
 ways to represent them in memory. This is usually designated by the so-called ``format``
-of the matrix. Since Theano's sparse matrix package is based on the SciPy
+of the matrix. Since Aesara's sparse matrix package is based on the SciPy
 sparse package, complete information about sparse matrices can be found
-in the SciPy documentation. Like SciPy, Theano does not implement sparse formats for
+in the SciPy documentation. Like SciPy, Aesara does not implement sparse formats for
 arrays with a number of dimensions different from two.
 
-So far, Theano implements two ``formats`` of sparse matrix: ``csc`` and ``csr``.
+So far, Aesara implements two ``formats`` of sparse matrix: ``csc`` and ``csr``.
 Those are almost identical except that ``csc`` is based on the *columns* of the
 matrix and ``csr`` is based on its *rows*. They both have the same purpose:
 to provide for the use of efficient algorithms performing linear algebra operations.
@@ -46,17 +46,17 @@ More documentation may be found in the :ref:`Sparse Library Reference <libdoc_sp
 Before going further, here are the ``import`` statements that are assumed for the rest of the
 tutorial:
 
->>> import theano
+>>> import aesara
 >>> import numpy as np
 >>> import scipy.sparse as sp
->>> from theano import sparse
+>>> from aesara import sparse
 
 Compressed Sparse Format
 ========================
 
 .. Changes to this section should also result in changes to library/sparse/index.txt.
 
-Theano supports two *compressed sparse formats*: ``csc`` and ``csr``, respectively based on columns
+Aesara supports two *compressed sparse formats*: ``csc`` and ``csr``, respectively based on columns
 and rows. They have both the same attributes: ``data``, ``indices``, ``indptr`` and ``shape``.
 
   * The ``data`` attribute is a one-dimensional ``ndarray`` which contains all the non-zero
@@ -92,10 +92,10 @@ what may be the most relevant rule:
 The documentation about the ops and their supported format may be found in
 the :ref:`Sparse Library Reference <libdoc_sparse>`.
 
-Handling Sparse in Theano
+Handling Sparse in Aesara
 =========================
 
-Most of the ops in Theano depend on the ``format`` of the sparse matrix.
+Most of the ops in Aesara depend on the ``format`` of the sparse matrix.
 That is why there are two kinds of constructors of sparse variables:
 ``csc_matrix`` and ``csr_matrix``. These can be called with the usual
 ``name`` and ``dtype`` parameters, but no ``broadcastable`` flags are
@@ -111,7 +111,7 @@ set(['int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
 To and Fro
 ----------
 
-To move back and forth from a dense matrix to a sparse matrix representation, Theano
+To move back and forth from a dense matrix to a sparse matrix representation, Aesara
 provides the ``dense_from_sparse``, ``csr_from_dense`` and
 ``csc_from_dense`` functions. No additional detail must be provided. Here is
 an example that performs a full cycle from sparse to sparse:
@@ -136,7 +136,7 @@ a ``csr`` one.
 >>> x = sparse.csc_matrix(name='x', dtype='int64')
 >>> data, indices, indptr, shape = sparse.csm_properties(x)
 >>> y = sparse.CSR(data, indices, indptr, shape)
->>> f = theano.function([x], y)
+>>> f = aesara.function([x], y)
 >>> a = sp.csc_matrix(np.asarray([[0, 1, 1], [0, 0, 0], [1, 0, 0]]))
 >>> print(a.toarray())
 [[0 1 1]
@@ -163,7 +163,7 @@ provide a structured gradient. More explication below.
 
 >>> x = sparse.csc_matrix(name='x', dtype='float32')
 >>> y = sparse.structured_add(x, 2)
->>> f = theano.function([x], y)
+>>> f = aesara.function([x], y)
 >>> a = sp.csc_matrix(np.asarray([[0, 0, -1], [0, -2, 1], [3, 0, 0]], dtype='float32'))
 >>> print(a.toarray())
 [[ 0.  0. -1.]
diff --git a/doc/tutorial/using_gpu.txt b/doc/tutorial/using_gpu.txt
index 4a8ce036fe..4111a70e06 100644
--- a/doc/tutorial/using_gpu.txt
+++ b/doc/tutorial/using_gpu.txt
@@ -8,20 +8,20 @@ For an introductory discussion of *Graphical Processing Units* (GPU)
 and their use for intensive parallel computation purposes, see `GPGPU
 <http://en.wikipedia.org/wiki/GPGPU>`_.
 
-One of Theano's design goals is to specify computations at an abstract
+One of Aesara's design goals is to specify computations at an abstract
 level, so that the internal function compiler has a lot of flexibility
 about how to carry out those computations.  One of the ways we take
 advantage of this flexibility is in carrying out calculations on a
 graphics card.
 
-Using the GPU in Theano is as simple as setting the ``device``
+Using the GPU in Aesara is as simple as setting the ``device``
 configuration flag to ``device=cuda``. You can optionally target a
 specific gpu by specifying the number of the gpu as in
 e.g. ``device=cuda2``.  It is also encouraged to set the floating
 point precision to float32 when working on the GPU as that is usually
 much faster.  For example:
-``THEANO_FLAGS='device=cuda,floatX=float32'``.  You can also set these
-options in the .theanorc file's ``[global]`` section:
+``AESARA_FLAGS='device=cuda,floatX=float32'``.  You can also set these
+options in the .aesararc file's ``[global]`` section:
 
      .. code-block:: cfg
 
@@ -35,9 +35,9 @@ options in the .theanorc file's ``[global]`` section:
       the driver selects the one to use (usually cuda0).
     * You can use the program ``nvidia-smi`` to change this policy.
     * By default, when ``device`` indicates preference for GPU computations,
-      Theano will fall back to the CPU if there is a problem with the GPU.
+      Aesara will fall back to the CPU if there is a problem with the GPU.
       You can use the flag ``force_device=True`` to instead raise an error when
-      Theano cannot use the GPU.
+      Aesara cannot use the GPU.
 
 .. _gpuarray:
 
@@ -50,7 +50,7 @@ instructions to accomplish that are provided at
 `libgpuarray <http://deeplearning.net/software/libgpuarray/installation.html>`_.
 
 To install Nvidia's GPU-programming toolchain (CUDA) and configure
-Theano to use it, see the installation instructions for
+Aesara to use it, see the installation instructions for
 :ref:`Linux <gpu_linux>`, :ref:`MacOS <gpu_macos>` and :ref:`Windows <gpu_windows>`.
 
 While all types of devices are supported if using OpenCL, for the
@@ -69,18 +69,18 @@ be referred to as GPU.
 
   .. _testing_the_gpu:
 
-Testing Theano with GPU
+Testing Aesara with GPU
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 To see if your GPU is being used, cut and paste the following program
 into a file and run it.
 
-Use the Theano flag ``device=cuda`` to require the use of the GPU. Use the flag
+Use the Aesara flag ``device=cuda`` to require the use of the GPU. Use the flag
 ``device=cuda{0,1,...}`` to specify which GPU to use.
 
 .. testcode::
 
-  from theano import function, config, shared, tensor as tt
+  from aesara import function, config, shared, tensor as tt
   import numpy
   import time
 
@@ -97,7 +97,7 @@ Use the Theano flag ``device=cuda`` to require the use of the GPU. Use the flag
   t1 = time.time()
   print("Looping %d times took %f seconds" % (iters, t1 - t0))
   print("Result is %s" % (r,))
-  if numpy.any([isinstance(x.op, theano.tensor.elemwise.Elemwise) and
+  if numpy.any([isinstance(x.op, aesara.tensor.elemwise.Elemwise) and
                 ('Gpu' not in type(x.op).__name__)
                 for x in f.maker.fgraph.toposort()]):
       print('Used the cpu')
@@ -105,7 +105,7 @@ Use the Theano flag ``device=cuda`` to require the use of the GPU. Use the flag
       print('Used the gpu')
 
 The program just computes ``exp()`` of a bunch of random numbers.  Note
-that we use the :func:`theano.shared` function to make sure that the
+that we use the :func:`aesara.shared` function to make sure that the
 input *x* is stored on the GPU.
 
 .. testoutput::
@@ -119,14 +119,14 @@ input *x* is stored on the GPU.
 
 .. code-block:: none
 
-  $ THEANO_FLAGS=device=cpu python gpu_tutorial1.py
+  $ AESARA_FLAGS=device=cpu python gpu_tutorial1.py
   [Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
   Looping 1000 times took 2.271284 seconds
   Result is [ 1.23178032  1.61879341  1.52278065 ...,  2.20771815  2.29967753
     1.62323285]
   Used the cpu
 
-  $ THEANO_FLAGS=device=cuda0 python gpu_tutorial1.py
+  $ AESARA_FLAGS=device=cuda0 python gpu_tutorial1.py
   Using cuDNN version 5105 on context None
   Mapped name None to device cuda0: GeForce GTX 750 Ti (0000:07:00.0)
   [GpuElemwise{exp,no_inplace}(<GpuArrayType<None>(float64, (False,))>), HostFromGpu(gpuarray)(GpuElemwise{exp,no_inplace}.0)]
@@ -145,12 +145,12 @@ results are returned to ensure a consistent interface with CPU code.
 This allows changing the device some code runs on by only replacing
 the value of the ``device`` flag without touching the code.
 
-If you don't mind a loss of flexibility, you can ask theano to return
+If you don't mind a loss of flexibility, you can ask aesara to return
 the GPU object directly.  The following code is modified to do just that.
 
 .. testcode::
 
-  from theano import function, config, shared, tensor as tt
+  from aesara import function, config, shared, tensor as tt
   import numpy
   import time
 
@@ -167,7 +167,7 @@ the GPU object directly.  The following code is modified to do just that.
   t1 = time.time()
   print("Looping %d times took %f seconds" % (iters, t1 - t0))
   print("Result is %s" % (numpy.asarray(r),))
-  if numpy.any([isinstance(x.op, theano.tensor.elemwise.Elemwise) and
+  if numpy.any([isinstance(x.op, aesara.tensor.elemwise.Elemwise) and
                 ('Gpu' not in type(x.op).__name__)
                 for x in f.maker.fgraph.toposort()]):
       print('Used the cpu')
@@ -184,7 +184,7 @@ The output is
    :hide:
    :options: +ELLIPSIS, +SKIP
 
-   $ THEANO_FLAGS=device=cuda0 python gpu_tutorial2.py
+   $ AESARA_FLAGS=device=cuda0 python gpu_tutorial2.py
    Using cuDNN version 5105 on context None
    Mapped name None to device cuda0: GeForce GTX 750 Ti (0000:07:00.0)
    [GpuElemwise{exp,no_inplace}(<GpuArrayType<None>(float64, (False,))>)]
@@ -196,7 +196,7 @@ The output is
 
 .. code-block:: none
 
-  $ THEANO_FLAGS=device=cuda0 python gpu_tutorial2.py
+  $ AESARA_FLAGS=device=cuda0 python gpu_tutorial2.py
   Using cuDNN version 5105 on context None
   Mapped name None to device cuda0: GeForce GTX 750 Ti (0000:07:00.0)
   [GpuElemwise{exp,no_inplace}(<GpuArrayType<None>(float64, (False,))>)]
@@ -237,7 +237,7 @@ device, and also as we refine our implementation:
   on that data. Getting GPU performance largely hinges on making data transfer
   to the device pay off.
 
-The backend supports all regular theano data types (float32, float64,
+The backend supports all regular aesara data types (float32, float64,
 int, ...), however GPU support varies and some units can't deal with
 double (float64) or small (less than 32 bits like int16) data types.
 You will get an error at compile time or runtime if this is the case.
@@ -252,7 +252,7 @@ Tips for Improving Performance on GPU
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 * Consider adding ``floatX=float32`` (or the type you are using) to your
-  ``.theanorc`` file if you plan to do a lot of GPU work.
+  ``.aesararc`` file if you plan to do a lot of GPU work.
 * The GPU backend supports *float64* variables, but they are still slower
   to compute than *float32*. The more *float32*, the better GPU performance
   you will get.
@@ -270,7 +270,7 @@ Tips for Improving Performance on GPU
   information at program termination. Is time being used sensibly?  If
   an op or Apply is taking more time than its share, then if you know
   something about GPU programming, have a look at how it's implemented
-  in theano.gpuarray.  Check the line similar to *Spent Xs(X%) in cpu
+  in aesara.gpuarray.  Check the line similar to *Spent Xs(X%) in cpu
   op, Xs(X%) in gpu op and Xs(X%) in transfer op*. This can tell you
   if not enough of your graph is on the GPU or if there is too much
   memory transfer.
@@ -312,25 +312,25 @@ Consider again the logistic regression:
 .. testcode::
 
     import numpy
-    import theano
-    import theano.tensor as tt
+    import aesara
+    import aesara.tensor as tt
     rng = numpy.random
 
     N = 400
     feats = 784
-    D = (rng.randn(N, feats).astype(theano.config.floatX),
-    rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
+    D = (rng.randn(N, feats).astype(aesara.config.floatX),
+    rng.randint(size=N,low=0, high=2).astype(aesara.config.floatX))
     training_steps = 10000
 
-    # Declare Theano symbolic variables
+    # Declare Aesara symbolic variables
     x = tt.matrix("x")
     y = tt.vector("y")
-    w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
-    b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
+    w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
+    b = aesara.shared(numpy.asarray(0., dtype=aesara.config.floatX), name="b")
     x.tag.test_value = D[0]
     y.tag.test_value = D[1]
 
-    # Construct Theano expression graph
+    # Construct Aesara expression graph
     p_1 = 1 / (1 + tt.exp(-tt.dot(x, w)-b)) # Probability of having a one
     prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
     xent = -y*tt.log(p_1) - (1-y)*tt.log(1-p_1) # Cross-entropy
@@ -338,12 +338,12 @@ Consider again the logistic regression:
     gw,gb = tt.grad(cost, [w,b])
 
     # Compile expressions to functions
-    train = theano.function(
+    train = aesara.function(
                 inputs=[x,y],
                 outputs=[prediction, xent],
                 updates=[(w, w-0.01*gw), (b, b-0.01*gb)],
                 name = "train")
-    predict = theano.function(inputs=[x], outputs=prediction,
+    predict = aesara.function(inputs=[x], outputs=prediction,
                 name = "predict")
 
     if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
@@ -353,7 +353,7 @@ Consider again the logistic regression:
               train.maker.fgraph.toposort()]):
         print('Used the gpu')
     else:
-        print('ERROR, not able to tell if theano used the cpu or the gpu')
+        print('ERROR, not able to tell if aesara used the cpu or the gpu')
         print(train.maker.fgraph.toposort())
 
     for i in range(training_steps):
@@ -365,8 +365,8 @@ Consider again the logistic regression:
     print("prediction on D")
     print(predict(D[0]))
 
-    print("floatX=", theano.config.floatX)
-    print("device=", theano.config.device)
+    print("floatX=", aesara.config.floatX)
+    print("device=", aesara.config.device)
 
 .. testoutput::
    :hide:
@@ -398,7 +398,7 @@ your ideas to test.
 Software for Directly Programming a GPU
 ---------------------------------------
 
-Leaving aside Theano which is a meta-programmer, there are:
+Leaving aside Aesara which is a meta-programmer, there are:
 
 * **CUDA**: GPU programming API by NVIDIA based on extension to C (CUDA C)
 
@@ -535,19 +535,19 @@ Modify and execute to work for a matrix of shape (20, 10).
 
 
 
-.. _pyCUDA_theano:
+.. _pyCUDA_aesara:
 
-**Example: Theano + PyCUDA**
+**Example: Aesara + PyCUDA**
 
 
 .. code-block:: python
 
-    import numpy, theano
-    import theano.misc.pycuda_init
+    import numpy, aesara
+    import aesara.misc.pycuda_init
     from pycuda.compiler import SourceModule
-    import theano.sandbox.cuda as cuda
-    from theano.graph.basic import Apply
-    from theano.graph.op import Op
+    import aesara.sandbox.cuda as cuda
+    from aesara.graph.basic import Apply
+    from aesara.graph.op import Op
 
 
     class PyCUDADoubleOp(Op):
@@ -584,8 +584,8 @@ Modify and execute to work for a matrix of shape (20, 10).
 
 Use this code to test it:
 
->>> x = theano.tensor.type.fmatrix()
->>> f = theano.function([x], PyCUDADoubleOp()(x))  # doctest: +SKIP
+>>> x = aesara.tensor.type.fmatrix()
+>>> f = aesara.function([x], PyCUDADoubleOp()(x))  # doctest: +SKIP
 >>> xv = numpy.ones((4, 5), dtype="float32")
 >>> assert numpy.allclose(f(xv), xv*2)  # doctest: +SKIP
 >>> print(numpy.asarray(f(xv)))  # doctest: +SKIP
@@ -600,7 +600,7 @@ Modify and execute to multiply two matrices: *x* * *y*.
 
 Modify and execute to return two outputs: *x + y* and *x - y*.
 
-(Notice that Theano's current *elemwise fusion* optimization is
+(Notice that Aesara's current *elemwise fusion* optimization is
 only applicable to computations involving a single output. Hence, to gain
 efficiency over the basic solution that is asked here, the two operations would
 have to be jointly optimized explicitly in the code.)
@@ -614,5 +614,5 @@ Note
   on the GPU.
 
 * The mode `FAST_COMPILE` disables C code, so also disables the GPU. You
-  can use the Theano flag optimizer='fast_compile' to speed up
+  can use the Aesara flag optimizer='fast_compile' to speed up
   compilation and keep the GPU.
diff --git a/doc/tutorial/using_gpu_solution_1.py b/doc/tutorial/using_gpu_solution_1.py
index 0e819850c3..33f0011425 100755
--- a/doc/tutorial/using_gpu_solution_1.py
+++ b/doc/tutorial/using_gpu_solution_1.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Theano tutorial
+# Aesara tutorial
 # Solution to Exercise in section 'Using the GPU'
 
 
@@ -8,30 +8,30 @@
 
 
 import numpy as np
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
 
-theano.config.floatX = 'float32'
+aesara.config.floatX = 'float32'
 
 rng = np.random
 
 N = 400
 feats = 784
-D = (rng.randn(N, feats).astype(theano.config.floatX),
-    rng.randint(size=N, low=0, high=2).astype(theano.config.floatX))
+D = (rng.randn(N, feats).astype(aesara.config.floatX),
+    rng.randint(size=N, low=0, high=2).astype(aesara.config.floatX))
 training_steps = 10000
 
-# Declare Theano symbolic variables
-x = theano.shared(D[0], name="x")
-y = theano.shared(D[1], name="y")
-w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
-b = theano.shared(np.asarray(0., dtype=theano.config.floatX), name="b")
+# Declare Aesara symbolic variables
+x = aesara.shared(D[0], name="x")
+y = aesara.shared(D[1], name="y")
+w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
+b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
 x.tag.test_value = D[0]
 y.tag.test_value = D[1]
 #print "Initial model:"
 #print w.get_value(), b.get_value()
 
-# Construct Theano expression graph
+# Construct Aesara expression graph
 p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))  # Probability of having a one
 prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
 xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy
@@ -40,12 +40,12 @@
 gw, gb = tt.grad(cost, [w, b])
 
 # Compile expressions to functions
-train = theano.function(
+train = aesara.function(
             inputs=[],
             outputs=[prediction, xent],
             updates=[(w, w - 0.01 * gw), (b, b - 0.01 * gb)],
             name="train")
-predict = theano.function(inputs=[], outputs=prediction,
+predict = aesara.function(inputs=[], outputs=prediction,
             name="predict")
 
 if any([n.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for n in
@@ -55,7 +55,7 @@
 train.maker.fgraph.toposort()]):
     print('Used the gpu')
 else:
-    print('ERROR, not able to tell if theano used the cpu or the gpu')
+    print('ERROR, not able to tell if aesara used the cpu or the gpu')
     print(train.maker.fgraph.toposort())
 
 for i in range(training_steps):
@@ -77,7 +77,7 @@
 # 2.1 Profiling for CPU computations
 
 # In your terminal, type:
-$ THEANO_FLAGS=profile=True,device=cpu python using_gpu_solution_1.py
+$ AESARA_FLAGS=profile=True,device=cpu python using_gpu_solution_1.py
 
 # You'll see first the output of the script:
 Used the cpu
@@ -98,22 +98,22 @@
   Time in thunks: 1.157602e+00s (89.015%)
   Total compile time: 8.922548e-01s
     Number of Apply nodes: 17
-    Theano Optimizer time: 6.270301e-01s
-       Theano validate time: 5.993605e-03s
-    Theano Linker time (includes C, CUDA code generation/compiling): 2.949309e-02s
+    Aesara Optimizer time: 6.270301e-01s
+       Aesara validate time: 5.993605e-03s
+    Aesara Linker time (includes C, CUDA code generation/compiling): 2.949309e-02s
        Import time 3.543139e-03s
 
-Time in all call to theano.grad() 1.848292e-02s
-Time since theano import 2.864s
+Time in all call to aesara.grad() 1.848292e-02s
+Time since aesara import 2.864s
 Class
 ---
 <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
-  64.5%    64.5%       0.747s       3.73e-05s     C    20001       3   theano.tensor.blas_c.CGemv
-  33.1%    97.7%       0.384s       4.79e-06s     C    80001       9   theano.tensor.elemwise.Elemwise
-   1.0%    98.6%       0.011s       1.14e-06s     C    10000       1   theano.tensor.elemwise.Sum
-   0.7%    99.4%       0.009s       2.85e-07s     C    30001       4   theano.tensor.elemwise.DimShuffle
-   0.3%    99.7%       0.004s       3.64e-07s     C    10001       2   theano.tensor.basic.AllocEmpty
-   0.3%   100.0%       0.004s       1.78e-07s     C    20001       3   theano.compile.ops.Shape_i
+  64.5%    64.5%       0.747s       3.73e-05s     C    20001       3   aesara.tensor.blas_c.CGemv
+  33.1%    97.7%       0.384s       4.79e-06s     C    80001       9   aesara.tensor.elemwise.Elemwise
+   1.0%    98.6%       0.011s       1.14e-06s     C    10000       1   aesara.tensor.elemwise.Sum
+   0.7%    99.4%       0.009s       2.85e-07s     C    30001       4   aesara.tensor.elemwise.DimShuffle
+   0.3%    99.7%       0.004s       3.64e-07s     C    10001       2   aesara.tensor.basic.AllocEmpty
+   0.3%   100.0%       0.004s       1.78e-07s     C    20001       3   aesara.compile.ops.Shape_i
    ... (remaining 0 Classes account for   0.00%(0.00s) of the runtime)
 
 Ops
@@ -167,7 +167,7 @@
 # 2.2 Profiling for GPU computations
 
 # In your terminal, type:
-$ CUDA_LAUNCH_BLOCKING=1 THEANO_FLAGS=profile=True,device=cuda python using_gpu_solution_1.py
+$ CUDA_LAUNCH_BLOCKING=1 AESARA_FLAGS=profile=True,device=cuda python using_gpu_solution_1.py
 
 # You'll see first the output of the script:
 Used the gpu
@@ -186,24 +186,24 @@
   Time in thunks: 3.915566e+00s (93.646%)
   Total compile time: 9.256095e+00s
     Number of Apply nodes: 21
-    Theano Optimizer time: 9.996419e-01s
-       Theano validate time: 6.523132e-03s
-    Theano Linker time (includes C, CUDA code generation/compiling): 8.239602e+00s
+    Aesara Optimizer time: 9.996419e-01s
+       Aesara validate time: 6.523132e-03s
+    Aesara Linker time (includes C, CUDA code generation/compiling): 8.239602e+00s
        Import time 4.228115e-03s
 
-Time in all call to theano.grad() 3.286195e-02s
-Time since theano import 15.415s
+Time in all call to aesara.grad() 3.286195e-02s
+Time since aesara import 15.415s
 Class
 ---
 <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
-  59.5%    59.5%       2.329s       1.16e-04s     C    20001       3   theano.sandbox.gpuarray.blas.GpuGemv
-  29.8%    89.3%       1.166s       1.30e-05s     C    90001      10   theano.sandbox.gpuarray.elemwise.GpuElemwise
-   4.1%    93.4%       0.162s       8.10e-06s     C    20001       3   theano.sandbox.gpuarray.basic_ops.HostFromGpu
-   3.3%    96.7%       0.131s       1.31e-05s     C    10000       1   theano.sandbox.gpuarray.elemwise.GpuCAReduceCuda
-   1.6%    98.3%       0.061s       6.10e-06s     C    10000       1   theano.sandbox.gpuarray.basic_ops.GpuFromHost
-   0.8%    99.1%       0.033s       1.09e-06s     C    30001       4   theano.sandbox.gpuarray.elemwise.GpuDimShuffle
-   0.7%    99.8%       0.026s       2.59e-06s     C    10001       2   theano.sandbox.gpuarray.basic_ops.GpuAllocEmpty
-   0.2%   100.0%       0.008s       3.95e-07s     C    20001       3   theano.compile.ops.Shape_i
+  59.5%    59.5%       2.329s       1.16e-04s     C    20001       3   aesara.sandbox.gpuarray.blas.GpuGemv
+  29.8%    89.3%       1.166s       1.30e-05s     C    90001      10   aesara.sandbox.gpuarray.elemwise.GpuElemwise
+   4.1%    93.4%       0.162s       8.10e-06s     C    20001       3   aesara.sandbox.gpuarray.basic_ops.HostFromGpu
+   3.3%    96.7%       0.131s       1.31e-05s     C    10000       1   aesara.sandbox.gpuarray.elemwise.GpuCAReduceCuda
+   1.6%    98.3%       0.061s       6.10e-06s     C    10000       1   aesara.sandbox.gpuarray.basic_ops.GpuFromHost
+   0.8%    99.1%       0.033s       1.09e-06s     C    30001       4   aesara.sandbox.gpuarray.elemwise.GpuDimShuffle
+   0.7%    99.8%       0.026s       2.59e-06s     C    10001       2   aesara.sandbox.gpuarray.basic_ops.GpuAllocEmpty
+   0.2%   100.0%       0.008s       3.95e-07s     C    20001       3   aesara.compile.ops.Shape_i
    ... (remaining 0 Classes account for   0.00%(0.00s) of the runtime)
 
 Ops
diff --git a/doc/tutorial/using_multi_gpu.txt b/doc/tutorial/using_multi_gpu.txt
index 5eef4cc593..ae2f312163 100644
--- a/doc/tutorial/using_multi_gpu.txt
+++ b/doc/tutorial/using_multi_gpu.txt
@@ -5,14 +5,14 @@
 Using multiple GPUs
 ===================
 
-Theano has a feature to allow the use of multiple GPUs at the same
+Aesara has a feature to allow the use of multiple GPUs at the same
 time in one function.  The multiple gpu feature requires the use of
 the :ref:`gpuarray` backend, so make sure that works correctly.
 
 In order to keep a reasonably high level of abstraction you do not
 refer to device names directly for multiple-gpu use.  You instead
 refer to what we call context names.  These are then mapped to a
-device using the theano configuration.  This allows portability of
+device using the aesara configuration.  This allows portability of
 models between machines.
 
 .. warning::
@@ -42,7 +42,7 @@ number of such mappings, but in the example above we have two of them:
 
 The mappings themselves are composed of a context name followed by the
 two characters '->' and the device name.  The context name is a simple
-string which does not have any special meaning for Theano.  For
+string which does not have any special meaning for Aesara.  For
 parsing reasons, the context name cannot contain the sequence '->' or
 ';'.  To avoid confusion context names that begin with 'cuda' or
 'opencl' are disallowed.  The device name is a device in the form that
@@ -55,15 +55,15 @@ gpuarray expects like 'cuda0' or 'opencl0:0'.
 
    .. code-block:: shell
 
-       $ THEANO_FLAGS="contexts=dev0->cuda0"
+       $ AESARA_FLAGS="contexts=dev0->cuda0"
 
 When you define a context map, if :attr:`config.print_active_device`
-is `True` (the default), Theano will print the mappings as they are
+is `True` (the default), Aesara will print the mappings as they are
 defined.  This will look like this:
 
 .. code-block:: bash
 
-   $ THEANO_FLAGS="contexts=dev0->cuda0;dev1->cuda1" python -c 'import theano'
+   $ AESARA_FLAGS="contexts=dev0->cuda0;dev1->cuda1" python -c 'import aesara'
    Mapped name dev0 to device cuda0: GeForce GTX TITAN X (0000:09:00.0)
    Mapped name dev1 to device cuda1: GeForce GTX TITAN X (0000:06:00.0)
 
@@ -71,7 +71,7 @@ defined.  This will look like this:
 If you don't have enough GPUs for a certain model, you can assign the
 same device to more than one name. You can also assign extra names
 that a model doesn't need to some other devices.  However, a
-proliferation of names is not always a good idea since theano often
+proliferation of names is not always a good idea since aesara often
 assumes that different context names will be on different devices and
 will optimize accordingly.  So you may get faster performance for a
 single name and a single device.
@@ -95,19 +95,19 @@ which perform two dot products on two different GPUs.
 .. code-block:: python
 
    import numpy
-   import theano
+   import aesara
 
-   v01 = theano.shared(numpy.random.random((1024, 1024)).astype('float32'),
+   v01 = aesara.shared(numpy.random.random((1024, 1024)).astype('float32'),
                        target='dev0')
-   v02 = theano.shared(numpy.random.random((1024, 1024)).astype('float32'),
+   v02 = aesara.shared(numpy.random.random((1024, 1024)).astype('float32'),
                        target='dev0')
-   v11 = theano.shared(numpy.random.random((1024, 1024)).astype('float32'),
+   v11 = aesara.shared(numpy.random.random((1024, 1024)).astype('float32'),
                        target='dev1')
-   v12 = theano.shared(numpy.random.random((1024, 1024)).astype('float32'),
+   v12 = aesara.shared(numpy.random.random((1024, 1024)).astype('float32'),
                        target='dev1')
 
-   f = theano.function([], [theano.tensor.dot(v01, v02),
-                            theano.tensor.dot(v11, v12)])
+   f = aesara.function([], [aesara.tensor.dot(v01, v02),
+                            aesara.tensor.dot(v11, v12)])
 
    f()
 
@@ -129,9 +129,9 @@ is a example.
 
 .. code-block:: python
 
-   import theano
+   import aesara
 
-   v = theano.tensor.fmatrix()
+   v = aesara.tensor.fmatrix()
 
    # Move to the device associated with 'gpudev'
    gv = v.transfer('gpudev')
diff --git a/doc/updating.txt b/doc/updating.txt
index 0c4effd80f..b4552735f0 100755
--- a/doc/updating.txt
+++ b/doc/updating.txt
@@ -2,10 +2,10 @@
 
 .. _updating:
 
-Updating Theano
+Updating Aesara
 ===============
 
-Follow one of these three sections depending on how you installed Theano.
+Follow one of these three sections depending on how you installed Aesara.
 
 You should update frequently, bugs are fixed on a very regular basis, and features are
 added even more frequently!
@@ -13,70 +13,70 @@ added even more frequently!
 Stable Installation
 -------------------
 
-The following command will update only Theano:
+The following command will update only Aesara:
 
 .. raw:: html
 
-    <pre><span class="red">&#60;sudo&#62;</span> pip install <span class="blue">&#60;--user&#62;</span> <span class="pink">&#60;--no-deps&#62;</span> theano</pre>
+    <pre><span class="red">&#60;sudo&#62;</span> pip install <span class="blue">&#60;--user&#62;</span> <span class="pink">&#60;--no-deps&#62;</span> aesara</pre>
 
 - Use :red:`sudo` for a root installation.
 
-- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
+- Use :blue:`user` for a user installation without admin rights. It will install Aesara in your local site-packages.
 
-- Use :pink:`no-deps` when you don't want the dependencies of Theano to not be installed through pip. This is important when they have already been installed as system packages.
+- Use :pink:`no-deps` when you don't want the dependencies of Aesara to not be installed through pip. This is important when they have already been installed as system packages.
 
 .. warning::
 
     If you installed NumPy/SciPy with yum/apt-get, updating NumPy/SciPy
-    with pip/easy_install is not always a good idea. This can make Theano
+    with pip/easy_install is not always a good idea. This can make Aesara
     crash due to problems with BLAS. The versions of
     NumPy/SciPy in the distribution are sometimes linked against faster
     versions of BLAS. Installing NumPy/SciPy with
     yum/apt-get/pip/easy_install won't install the development package
     needed to recompile it with the fast version.
     To fix a possible crash, you can clear
-    the Theano cache like this:
+    the Aesara cache like this:
 
     .. code-block:: bash
 
-       theano-cache clear
+       aesara-cache clear
 
 Bleeding-Edge Installation
 --------------------------
 
-The following command will update your bleeding-edge version of Theano
+The following command will update your bleeding-edge version of Aesara
 
 .. raw:: html
 
-    <div style="width:100%"><pre><span class="red">&#60;sudo&#62;</span> pip install <span class="blue">&#60;--user&#62;</span> <span class="pink">&#60;--no-deps&#62;</span> git+https://github.com/Theano/Theano.git#egg=Theano</pre></div>
+    <div style="width:100%"><pre><span class="red">&#60;sudo&#62;</span> pip install <span class="blue">&#60;--user&#62;</span> <span class="pink">&#60;--no-deps&#62;</span> git+https://github.com/pymc-devs/aesara.git#egg=Aesara</pre></div>
 
 - Use :red:`sudo` for a root installation.
 
-- Use :blue:`user` for a user installation without admin rights. It will install Theano in your local site-packages.
+- Use :blue:`user` for a user installation without admin rights. It will install Aesara in your local site-packages.
 
-- Use :pink:`no-deps` when you don't want the dependencies of Theano to not be installed through pip. This is important when they have already been installed as system packages.
+- Use :pink:`no-deps` when you don't want the dependencies of Aesara to not be installed through pip. This is important when they have already been installed as system packages.
 
 .. warning::
 
     If you installed NumPy/SciPy with yum/apt-get, updating NumPy/SciPy
-    with pip/easy_install is not always a good idea. This can make Theano
+    with pip/easy_install is not always a good idea. This can make Aesara
     crash due to problems with BLAS. The versions of
     NumPy/SciPy in the distribution are sometimes linked against faster
     versions of BLAS. Installing NumPy/SciPy with
     yum/apt-get/pip/easy_install won't install the development package
     needed to recompile it with the fast version.
     To fix a possible crash, you can clear
-    the Theano cache like this:
+    the Aesara cache like this:
 
     .. code-block:: bash
 
-       theano-cache clear
+       aesara-cache clear
 
 Developer Installation
 ----------------------
 
 To update your library to the latest revision, change directory (``cd``)
-to your ``Theano`` folder and execute the following command:
+to your ``Aesara`` folder and execute the following command:
 
 .. warning::
 
diff --git a/notebooks/Dev_ClassGraph.ipynb b/notebooks/Dev_ClassGraph.ipynb
deleted file mode 100644
index 66632c7a88..0000000000
--- a/notebooks/Dev_ClassGraph.ipynb
+++ /dev/null
@@ -1,4765 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Development Notebook: Visualization of Class Hierarchies\n",
-    "This notebook was added to facilitate development & refactoring of the code base."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<style>.container { width:100% !important; }</style>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from IPython.core.display import display, HTML\n",
-    "display(HTML(\"<style>.container { width:100% !important; }</style>\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import ipycytoscape\n",
-    "import networkx\n",
-    "import inspect\n",
-    "import theano\n",
-    "import theano.tensor as tt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "modules = [\n",
-    "    # specify which modules to search:\n",
-    "    theano,\n",
-    "    tt\n",
-    "]\n",
-    "\n",
-    "# lookup for module-wise colors\n",
-    "# TODO: use shades of the parent color for sub-sub-modules\n",
-    "COLORS = {\n",
-    "    'theano.compile.function.pfunc': \"red\",\n",
-    "    'theano.compile.function.types': \"red\",\n",
-    "    'theano.compile.io': \"red\",\n",
-    "    'theano.compile.mode': \"red\",\n",
-    "    'theano.compile.ops': \"red\",\n",
-    "    'theano.compile.profiling': \"red\",\n",
-    "    'theano.configparser': \"purple\",\n",
-    "    'theano.gof.cc': \"yellow\",\n",
-    "    'theano.gof.fg': \"yellow\",\n",
-    "    'theano.gof.graph': \"yellow\",\n",
-    "    'theano.gof.link': \"yellow\",\n",
-    "    'theano.gof.op': \"yellow\",\n",
-    "    'theano.gof.params_type': \"yellow\",\n",
-    "    'theano.gof.type': \"yellow\",\n",
-    "    'theano.gof.utils': \"yellow\",\n",
-    "    'theano.gradient': \"green\",\n",
-    "    'theano.tensor.basic': \"blue\",\n",
-    "    'theano.tensor.elemwise': \"blue\",\n",
-    "    'theano.tensor.extra_ops': \"blue\",\n",
-    "    'theano.tensor.io': \"blue\",\n",
-    "    'theano.tensor.subtensor': \"blue\",\n",
-    "    'theano.tensor.type': \"blue\",\n",
-    "    'theano.tensor.type_other': \"blue\",\n",
-    "    'theano.tensor.var': \"blue\",\n",
-    "    'theano.updates': \"orange\",\n",
-    "}\n",
-    "\n",
-    "# build a dictionary of all classes in the specified modules\n",
-    "classes = {}\n",
-    "for module in modules:\n",
-    "    for name, mem in inspect.getmembers(module):\n",
-    "        if inspect.isclass(mem):\n",
-    "            classes[name] = mem\n",
-    "            if not mem.__module__ in COLORS:\n",
-    "                COLORS[mem.__module__] = \"pink\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# helper functions for creating a NetworkX \n",
-    "def add_or_create(G, cls):\n",
-    "    name = cls.__name__\n",
-    "    if name in G:\n",
-    "        return\n",
-    "    color = COLORS.get(cls.__module__, \"pink\")\n",
-    "    G.add_node(name, name=name, label=name, color=color, tooltip=str(cls.__module__))\n",
-    "    for superclass in cls.__mro__[1:2]:\n",
-    "        add_or_create(G, superclass)\n",
-    "        G.add_edge(superclass.__name__, name)\n",
-    "\n",
-    "def make_graph(classes):\n",
-    "    G = networkx.DiGraph()\n",
-    "    for name, cls in classes.items():\n",
-    "        add_or_create(G, cls)\n",
-    "    return G    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5a9c0fbf9bcf4dc99ea6d8afb429ddbc",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'LR', 'nodeDimensionsIncludeLabels': True, 'spac…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "G = make_graph(classes)\n",
-    "cso = ipycytoscape.CytoscapeWidget(height=500)\n",
-    "cso.graph.add_graph_from_networkx(G, directed=True)\n",
-    "cso.set_style([\n",
-    "    {\n",
-    "        'selector': 'node',\n",
-    "        'css': {\n",
-    "            #'background-color': '#9dbaea',\n",
-    "            'background-color': 'data(color)',\n",
-    "            'content': 'data(name)',\n",
-    "            'text-valign': 'center',\n",
-    "            'font-size': 10,\n",
-    "            #'color': 'white',\n",
-    "            #'text-outline-width': 2,\n",
-    "            #'text-outline-color': 'green',\n",
-    "            #'background-color': 'green'\n",
-    "        }\n",
-    "    },\n",
-    "    {'selector': 'node:parent', 'css': {'background-opacity': 0.333}},\n",
-    "    {\n",
-    "        'selector': ':selected',\n",
-    "        'css': {\n",
-    "            'background-color': 'black',\n",
-    "            'line-color': 'black',\n",
-    "            'target-arrow-color': 'black',\n",
-    "            'source-arrow-color': 'black',\n",
-    "            'text-outline-color': 'black'\n",
-    "        }\n",
-    "    },\n",
-    "    {'selector': 'edge', 'style': {'width': 2, 'line-color': '#11479e'}},\n",
-    "    {\n",
-    "        'selector': 'edge.directed',\n",
-    "        'style': {\n",
-    "            'curve-style': 'bezier',\n",
-    "            'target-arrow-shape': 'triangle',\n",
-    "            'target-arrow-color': '#11479e'\n",
-    "        }\n",
-    "    },\n",
-    "    {'selector': 'edge.multiple_edges', 'style': {'curve-style': 'bezier'}}\n",
-    "])\n",
-    "cso.layout.height = '1200px'\n",
-    "cso.set_layout(name=\"dagre\", rankDir=\"LR\", nodeDimensionsIncludeLabels=True, spacingFactor=0.7)\n",
-    "cso"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  },
-  "widgets": {
-   "application/vnd.jupyter.widget-state+json": {
-    "state": {
-     "002d6da2e6fe4cb3925e3889b33129cf": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "a176e649-bda0-41f8-a4fe-208c97cd5ccf",
-        "source": "Op",
-        "target": "SpecifyShape"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "0388b8b29c34472996cc4cdb6407b049": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "e683139d-41c4-4696-8874-a9fa4001590d",
-        "source": "Exception",
-        "target": "ShapeError"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "0999a7817f9943ffb8dc09adbc50d7ec": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "3a580d50-abea-4808-9906-bb5d20084fb2",
-        "source": "Op",
-        "target": "Subtensor"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "0a8c035be3cb43ee90bc471e30ded51d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Flatten",
-        "label": "Flatten",
-        "name": "Flatten",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2281.25
-       }
-      }
-     },
-     "0a9879ad64054a099950c9bf4304a7fc": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "65e56212-0624-4487-82ae-d13ba406868a",
-        "source": "Type",
-        "target": "ParamsType"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "0c9f9f3f606b4304b79f1534380d217d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Eye",
-        "label": "Eye",
-        "name": "Eye",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2223.85
-       }
-      }
-     },
-     "0d7745d27dff41e383ecb3da0f739140": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "5981cf07-940f-4b8b-92a0-54bf93c28328",
-        "source": "object",
-        "target": "Linker"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "0f3793e67c574ae6b16d99f0ea76c654": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "ad227f90-1e0f-4976-9012-3f5a6579f6b8",
-        "source": "_tensor_py_operators",
-        "target": "TensorConstant"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "105d941de88e40b2952c28a5e4564bd2": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Nonzero",
-        "label": "Nonzero",
-        "name": "Nonzero",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2855.25
-       }
-      }
-     },
-     "1714002d62074ab187c1630ef9bff6cf": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "_tensor_py_operators",
-        "label": "_tensor_py_operators",
-        "name": "_tensor_py_operators",
-        "tooltip": "theano.tensor.var"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4347.65
-       }
-      }
-     },
-     "18a7eb283b9549db8df3539b5e847cef": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Node",
-        "label": "Node",
-        "name": "Node",
-        "tooltip": "theano.gof.graph"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 846.2500000000002
-       }
-      }
-     },
-     "1b18dd448439490b93652283173adce2": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "InconsistencyError",
-        "label": "InconsistencyError",
-        "name": "InconsistencyError",
-        "tooltip": "theano.gof.fg"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3716.25
-       }
-      }
-     },
-     "1b2b88c4566d4099ab0936a5d369b7da": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "1.2.0",
-      "model_name": "LayoutModel",
-      "state": {
-       "height": "1200px"
-      }
-     },
-     "1b3efa11ba6d4b80b2c1d2345e8752e8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "8d3aaf37-e6e0-4737-9431-b98ee2f0366f",
-        "source": "Op",
-        "target": "Dot"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "1ca7a7bbe6314bb2ba2df5c065fb3de3": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Linker",
-        "label": "Linker",
-        "name": "Linker",
-        "tooltip": "theano.gof.link"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 3429.25
-       }
-      }
-     },
-     "1cbf9bc46ad1449bbdf21ff4c3263450": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "114ca966-8532-4f32-8be4-7ea63ceed01c",
-        "source": "Op",
-        "target": "IncSubtensor"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "1d62ad221ed1429fa940456a7c54affc": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Variable",
-        "label": "Variable",
-        "name": "Variable",
-        "tooltip": "theano.gof.graph"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 903.6500000000001
-       }
-      }
-     },
-     "1f8f919bae5744fbbf36734a31908659": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "89f33fc2-b884-4300-8e14-23c0ca714459",
-        "source": "Exception",
-        "target": "TypeError"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "1fa67f7ab2b140969b119bc0b1d472ff": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "purple",
-        "id": "change_flags",
-        "label": "change_flags",
-        "name": "change_flags",
-        "tooltip": "theano.configparser"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4175.45
-       }
-      }
-     },
-     "1ff84084f0b0477985fa12e6f91600ec": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "AdvancedSubtensor1",
-        "label": "AdvancedSubtensor1",
-        "name": "AdvancedSubtensor1",
-        "tooltip": "theano.tensor.subtensor"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1535.0500000000002
-       }
-      }
-     },
-     "201dd4bc7cae4b2e8306036ecbd381c2": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "6d45730a-5ccd-49b4-9b72-cdb2820f3356",
-        "source": "LocalLinker",
-        "target": "OpWiseCLinker"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "213318f3378145c68d2516a214761f53": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "0ce0c83a-c247-4bd1-be2c-37cacef6ef1c",
-        "source": "object",
-        "target": "SymbolicOutput"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "2340c07adc6044768bbe6b9e4baeec6d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "f0bba555-fcbe-401b-a20f-c95809ff0eec",
-        "source": "Op",
-        "target": "Alloc"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "27ff6520d333492ba76fc1c8b4c8f880": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "NoneTypeT",
-        "label": "NoneTypeT",
-        "name": "NoneTypeT",
-        "tooltip": "theano.tensor.type_other"
-       },
-       "position": {
-        "x": 624.175,
-        "y": 961.0500000000002
-       }
-      }
-     },
-     "2904b28264514b6198f00ef1b222b39b": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "19ffea0d-e00d-4cf5-91d6-cd3eb2a73e93",
-        "source": "object",
-        "target": "SymbolicInput"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "297cd1cb263a4ad0b2321e384ed2d8b7": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "b664991a-a2ad-4e82-8fc3-acd26dcc075b",
-        "source": "Exception",
-        "target": "MethodNotDefined"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "2ca85c74202d428b9c8a18081e980267": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "Exception",
-        "label": "Exception",
-        "name": "Exception",
-        "tooltip": "builtins"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 3831.05
-       }
-      }
-     },
-     "2e4beb5a3ef345c28e4fa60295dd4db5": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "0418a1a0-544f-48a3-9aae-07d67ac67240",
-        "source": "Type",
-        "target": "TensorType"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "3135f96b0c574e9d8841c33c9263b6c6": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "44ace864-aef4-45f7-8027-b8a281be87ad",
-        "source": "Op",
-        "target": "AdvancedSubtensor"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "33e168a3f9074518be942c01863d3551": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "a3c6998f-20f8-4b58-870f-eb891508f3c2",
-        "source": "CAReduce",
-        "target": "CAReduceDtype"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "35cb6efbe0894682aa751e3090db3fb8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "50d79af3-f024-4d98-9e79-eeb167350a65",
-        "source": "Op",
-        "target": "Tri"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "35d25408d0874a9db36b0a170e549203": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "MPISendWait",
-        "label": "MPISendWait",
-        "name": "MPISendWait",
-        "tooltip": "theano.tensor.io"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2683.05
-       }
-      }
-     },
-     "36bf721b1daf4a03a9e5c2ae770da8b7": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Subtensor",
-        "label": "Subtensor",
-        "name": "Subtensor",
-        "tooltip": "theano.tensor.subtensor"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3314.45
-       }
-      }
-     },
-     "376b7ad031184a71aedb281351713756": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "Shape",
-        "label": "Shape",
-        "name": "Shape",
-        "tooltip": "theano.compile.ops"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3142.25
-       }
-      }
-     },
-     "392e40ee037041f58fde00e9d00abcbe": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "580f3e58-13ac-483d-8496-c1dcb8d1f795",
-        "source": "object",
-        "target": "partial"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "3b0145e58db14f759bacff4f3922f7e8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Generic",
-        "label": "Generic",
-        "name": "Generic",
-        "tooltip": "theano.gof.type"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 961.0500000000002
-       }
-      }
-     },
-     "3d38d2af0f87432f987ab52d1598da55": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "TensorVariable",
-        "label": "TensorVariable",
-        "name": "TensorVariable",
-        "tooltip": "theano.tensor.var"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 4376.35
-       }
-      }
-     },
-     "3faca9dc920f4d5c87dea21e47939c3e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "SymbolicOutput",
-        "label": "SymbolicOutput",
-        "name": "SymbolicOutput",
-        "tooltip": "theano.compile.io"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4060.6499999999996
-       }
-      }
-     },
-     "40b0bceafb2448878ce3b6c1bef953d9": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "AdvancedIndexingError",
-        "label": "AdvancedIndexingError",
-        "name": "AdvancedIndexingError",
-        "tooltip": "theano.tensor.subtensor"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 3773.65
-       }
-      }
-     },
-     "437a3da0f49a4042a8def494764130d5": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "e611dd46-583d-4bd6-a79e-3b75226c82e1",
-        "source": "Op",
-        "target": "Nonzero"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "439d468e187945b59d7f536562ad9342": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "OrderedDict",
-        "label": "OrderedDict",
-        "name": "OrderedDict",
-        "tooltip": "collections"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 4003.25
-       }
-      }
-     },
-     "43c93d8aa9fd4c52a11cbfcf1d1ccf0a": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "SliceType",
-        "label": "SliceType",
-        "name": "SliceType",
-        "tooltip": "theano.tensor.type_other"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1133.25
-       }
-      }
-     },
-     "44c717bbd0e444ed806a84de8caaaf4c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Join",
-        "label": "Join",
-        "name": "Join",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2396.05
-       }
-      }
-     },
-     "45285d5fa8b846418fc1597227b29b6f": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "c53b59e2-6141-439e-bb40-3a4bf9223f8f",
-        "source": "OrderedDict",
-        "target": "OrderedUpdates"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "4582f058ac0b4c4c8f147dd5822d8a19": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "bda9122f-670d-4afe-9af5-1e6f436340a4",
-        "source": "Op",
-        "target": "LoadFromDisk"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "46cdcb19451c44a69b9c4b76b0c32b54": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "05b3efe4-4aa9-44b3-9f79-fcd446916b67",
-        "source": "tuple",
-        "target": "TensorConstantSignature"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "472cec6b1556423facf672a58d572177": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "4699f0ed-ba6d-4721-a466-03716ddfdf2a",
-        "source": "Op",
-        "target": "Eye"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "476b003029da441685039f353ac715bb": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "f39a91b2-eebe-4ad5-98a1-1b552dda9d50",
-        "source": "Exception",
-        "target": "NotScalarConstantError"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "48cfb2b2a4ab411cacd1672e0c0320b7": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "ff5fdf94-0024-490a-8a5f-862bda850cfb",
-        "source": "Linker",
-        "target": "DualLinker"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "4a498380b94c40389e39b8a9b1d79aa4": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "green",
-        "id": "numeric_grad",
-        "label": "numeric_grad",
-        "name": "numeric_grad",
-        "tooltip": "theano.gradient"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4605.95
-       }
-      }
-     },
-     "4ae475a73f9a419395a16520cdaafe68": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "f08f6dfa-10db-404d-9493-af4e192c02ec",
-        "source": "Reversible",
-        "target": "Sequence"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "4b23eea474794b6795a26c95d88079df": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "DimShuffle",
-        "label": "DimShuffle",
-        "name": "DimShuffle",
-        "tooltip": "theano.tensor.elemwise"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 2051.65
-       }
-      }
-     },
-     "4b84d219bb0f4afb9688d95920eb1b22": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "SpecifyShape",
-        "label": "SpecifyShape",
-        "name": "SpecifyShape",
-        "tooltip": "theano.compile.ops"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3199.65
-       }
-      }
-     },
-     "4bc2fe02366e4d9096a261ef8fe33a2a": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "GraphModel",
-      "state": {
-       "_adj": {},
-       "_model_module_version": "^1.0.4",
-       "_view_module": "jupyter-cytoscape",
-       "_view_module_version": "^1.0.4"
-      }
-     },
-     "4c0533b9a7204bada7abff075dd771e8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "33fe3fce-653b-4a0e-90ce-2a54ac0545bf",
-        "source": "Op",
-        "target": "CAReduce"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "4c1cde2663ed4ac2b1c6944666970439": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "349da31e-eb83-4bc6-bd44-eb52c5c211fb",
-        "source": "Op",
-        "target": "Join"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "4c5d8e778e1c42eaafee221a498370a9": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Constant",
-        "label": "Constant",
-        "name": "Constant",
-        "tooltip": "theano.gof.graph"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 903.6500000000001
-       }
-      }
-     },
-     "4d6dc4e13d6e4586a5ac9ff181a48d61": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "MPISend",
-        "label": "MPISend",
-        "name": "MPISend",
-        "tooltip": "theano.tensor.io"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2625.65
-       }
-      }
-     },
-     "4e36066dda37461db7a3d44151557b42": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "f497220c-b20b-4170-8a6d-d8f7bfda7f4f",
-        "source": "object2",
-        "target": "Op"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "4f94e7e7192b43788a33eeee0bdebd6c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Apply",
-        "label": "Apply",
-        "name": "Apply",
-        "tooltip": "theano.gof.graph"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 846.2500000000002
-       }
-      }
-     },
-     "50a68ed55dd94fdca7e6e88ff3ed9819": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "6a6cdf56-72c1-4c35-af3c-c7975bc8a8cd",
-        "source": "object2",
-        "target": "Node"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "51d3afad15fa4c08957b4c0f725daa6f": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "f5dea5f0-a5e4-4123-b369-8882a9c31a91",
-        "source": "object",
-        "target": "Mode"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "52b4cfa7813b46208806a812cf930e52": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Op",
-        "label": "Op",
-        "name": "Op",
-        "tooltip": "theano.gof.op"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 2367.35
-       }
-      }
-     },
-     "52c992870efd43219229c761a4692119": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "orange",
-        "id": "OrderedUpdates",
-        "label": "OrderedUpdates",
-        "name": "OrderedUpdates",
-        "tooltip": "theano.updates"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 4003.25
-       }
-      }
-     },
-     "531de1d3d4614a86a8a4deec43b42694": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "0ceeb9e0-f13b-44af-9c0a-26ce6c3d697e",
-        "source": "BaseException",
-        "target": "Exception"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "53777e18249747318bf7ea8396791c8b": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "TypeError",
-        "label": "TypeError",
-        "name": "TypeError",
-        "tooltip": "builtins"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3773.65
-       }
-      }
-     },
-     "54446d71dd29431585ba6016f59a038a": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "complex",
-        "label": "complex",
-        "name": "complex",
-        "tooltip": "builtins"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4720.75
-       }
-      }
-     },
-     "560c485869ab42a4b628e3007d3f2ea1": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "7888bb8a-3f84-4166-8a59-336242b5a937",
-        "source": "Op",
-        "target": "COp"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "56600419be624fc4ba7ca01740dcba8c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "FunctionGraph",
-        "label": "FunctionGraph",
-        "name": "FunctionGraph",
-        "tooltip": "theano.gof.fg"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 932.3500000000001
-       }
-      }
-     },
-     "56dc29367cfc470ba34aeac170534625": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Tile",
-        "label": "Tile",
-        "name": "Tile",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3429.25
-       }
-      }
-     },
-     "5861dab966bd40df9563ffa73daae29e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "BaseException",
-        "label": "BaseException",
-        "name": "BaseException",
-        "tooltip": "builtins"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 3831.05
-       }
-      }
-     },
-     "5a9c0fbf9bcf4dc99ea6d8afb429ddbc": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "CytoscapeModel",
-      "state": {
-       "_interaction_handlers": {},
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "auto_ungrabify": false,
-       "autolock": false,
-       "cytoscape_layout": {
-        "name": "dagre",
-        "nodeDimensionsIncludeLabels": true,
-        "rankDir": "LR",
-        "spacingFactor": 0.7
-       },
-       "cytoscape_style": [
-        {
-         "css": {
-          "background-color": "data(color)",
-          "content": "data(name)",
-          "font-size": 10,
-          "text-valign": "center"
-         },
-         "selector": "node"
-        },
-        {
-         "css": {
-          "background-opacity": 0.333
-         },
-         "selector": "node:parent"
-        },
-        {
-         "css": {
-          "background-color": "black",
-          "line-color": "black",
-          "source-arrow-color": "black",
-          "target-arrow-color": "black",
-          "text-outline-color": "black"
-         },
-         "selector": ":selected"
-        },
-        {
-         "selector": "edge",
-         "style": {
-          "line-color": "#11479e",
-          "width": 2
-         }
-        },
-        {
-         "selector": "edge.directed",
-         "style": {
-          "curve-style": "bezier",
-          "target-arrow-color": "#11479e",
-          "target-arrow-shape": "triangle"
-         }
-        },
-        {
-         "selector": "edge.multiple_edges",
-         "style": {
-          "curve-style": "bezier"
-         }
-        }
-       ],
-       "desktop_tap_threshold": 4,
-       "graph": "IPY_MODEL_7311e42bdd59408c9a5b4ed1cb738510",
-       "headless": false,
-       "hide_edges_on_viewport": false,
-       "layout": "IPY_MODEL_1b2b88c4566d4099ab0936a5d369b7da",
-       "max_zoom": 1e+50,
-       "min_zoom": 1e-50,
-       "motion_blur": false,
-       "motion_blur_opacity": 0.2,
-       "panning_enabled": true,
-       "pixel_ratio": "auto",
-       "rendered_position": {
-        "renderedPosition": {
-         "x": 100,
-         "y": 100
-        }
-       },
-       "selection_type": "single",
-       "style_enabled": true,
-       "texture_on_viewport": false,
-       "tooltip_source": "tooltip",
-       "touch_tap_threshold": 8,
-       "user_panning_enabled": true,
-       "user_zooming_enabled": true,
-       "wheel_sensitivity": 1,
-       "zoom": 2,
-       "zooming_enabled": true
-      }
-     },
-     "5b318bc2e17f44b3891520948cfe7384": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "FunctionMaker",
-        "label": "FunctionMaker",
-        "name": "FunctionMaker",
-        "tooltip": "theano.compile.function.types"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 3572.75
-       }
-      }
-     },
-     "5b4e4ca46fed432798507b1d05fc773b": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Elemwise",
-        "label": "Elemwise",
-        "name": "Elemwise",
-        "tooltip": "theano.tensor.elemwise"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 1248.0500000000002
-       }
-      }
-     },
-     "5b96d4b63e25442d853d46aa8d3f2292": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "c74b402b-ed57-4705-81e3-91d9ab8848f9",
-        "source": "In",
-        "target": "Param"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "5c5fc5e51c3f4589908ef330dce0d8db": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "Reversible",
-        "label": "Reversible",
-        "name": "Reversible",
-        "tooltip": "collections.abc"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 4232.85
-       }
-      }
-     },
-     "5d2ef7631b464e13a306a054d29f05ae": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "e933682c-011c-4db3-a3aa-52ad44d1b5f6",
-        "source": "OpenMPOp",
-        "target": "Elemwise"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "5dbf9c5492744434b7b49ae460032af0": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "ProfileStats",
-        "label": "ProfileStats",
-        "name": "ProfileStats",
-        "tooltip": "theano.compile.profiling"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4118.05
-       }
-      }
-     },
-     "5dd39a1e2a854a61a7487519eabad921": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "82333e91-fdda-49e6-a377-01e072cbeb7c",
-        "source": "Op",
-        "target": "Choose"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "5e0aa0dde1c94e929a56396f791f94ee": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "SubtensorPrinter",
-        "label": "SubtensorPrinter",
-        "name": "SubtensorPrinter",
-        "tooltip": "theano.tensor.subtensor"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4290.25
-       }
-      }
-     },
-     "5e6e47b1a4f34d35b2454ef9392cb46f": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "cf6a0a4c-e58b-4c92-a01c-7c2aad522150",
-        "source": "Op",
-        "target": "MPIRecvWait"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "602b531645b64416abdd26fff4b2539f": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "SingletonType",
-        "label": "SingletonType",
-        "name": "SingletonType",
-        "tooltip": "theano.gof.type"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 961.0500000000002
-       }
-      }
-     },
-     "6030cfbbd4bf42f686772d66bb43f59a": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "EmptyConstantError",
-        "label": "EmptyConstantError",
-        "name": "EmptyConstantError",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 3831.05
-       }
-      }
-     },
-     "63840bffd9314dd8954c70079ead8ac9": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "NotScalarConstantError",
-        "label": "NotScalarConstantError",
-        "name": "NotScalarConstantError",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3831.05
-       }
-      }
-     },
-     "661d3e7393a44a1f9a5b6805aea12b52": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "e5a445b2-7458-4eda-93e5-37c34a7f8c2e",
-        "source": "Op",
-        "target": "AdvancedSubtensor1"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "6662f279482e4d9eae8235f9d51d97e0": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "groupby",
-        "label": "groupby",
-        "name": "groupby",
-        "tooltip": "itertools"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4548.55
-       }
-      }
-     },
-     "694e076a78a04fd783103c009257cb97": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "CAReduceDtype",
-        "label": "CAReduceDtype",
-        "name": "CAReduceDtype",
-        "tooltip": "theano.tensor.elemwise"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 1936.85
-       }
-      }
-     },
-     "69ebd4a918be4b099a9550ff5846833c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "113e7aec-f14e-4b21-b568-b8a96c389683",
-        "source": "Type",
-        "target": "SliceType"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "6a62fa57c5724ed49b469289f46aac4d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "8b6f8521-240e-4c9f-94d8-15388b46ebcc",
-        "source": "Op",
-        "target": "MaxAndArgmax"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "6c22662c9a3f45e0a3cc73c87f2df5eb": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "MPIRecv",
-        "label": "MPIRecv",
-        "name": "MPIRecv",
-        "tooltip": "theano.tensor.io"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2510.85
-       }
-      }
-     },
-     "6c4d6f8df1d343198192cfd826b710ac": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Alloc",
-        "label": "Alloc",
-        "name": "Alloc",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1592.45
-       }
-      }
-     },
-     "6cfd3e7073d0475290118090c22a3225": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "bfcde41c-266b-42f8-8935-92e3b761d9bd",
-        "source": "Linker",
-        "target": "CLinker"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "6d11b6053b9e46ba98a36286cf996687": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "AdvancedSubtensor",
-        "label": "AdvancedSubtensor",
-        "name": "AdvancedSubtensor",
-        "tooltip": "theano.tensor.subtensor"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1477.65
-       }
-      }
-     },
-     "6fe29d2821574c6695e0d9c08795ebbf": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "96d2e8a9-7cb7-4b8d-bee2-8b2f1a0d5fee",
-        "source": "Op",
-        "target": "Tile"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "7311e42bdd59408c9a5b4ed1cb738510": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "GraphModel",
-      "state": {
-       "_adj": {
-        "ARange": {},
-        "AdvancedIncSubtensor": {},
-        "AdvancedIncSubtensor1": {},
-        "AdvancedIndexingError": {},
-        "AdvancedSubtensor": {},
-        "AdvancedSubtensor1": {},
-        "Alloc": {},
-        "AllocDiag": {},
-        "AllocEmpty": {},
-        "Apply": {},
-        "Argmax": {},
-        "BaseException": {},
-        "CAReduce": {},
-        "CAReduceDtype": {},
-        "CLinker": {},
-        "COp": {},
-        "Choose": {},
-        "Constant": {},
-        "Container": {},
-        "Default": {},
-        "DiffOp": {},
-        "DimShuffle": {},
-        "DisconnectedType": {},
-        "Dot": {},
-        "DualLinker": {},
-        "Elemwise": {},
-        "EmptyConstantError": {},
-        "Exception": {},
-        "ExtractDiag": {},
-        "Eye": {},
-        "Flatten": {},
-        "FunctionGraph": {},
-        "FunctionMaker": {},
-        "Generic": {},
-        "In": {},
-        "IncSubtensor": {},
-        "InconsistencyError": {},
-        "Iterable": {},
-        "Join": {},
-        "Linker": {},
-        "LoadFromDisk": {},
-        "LocalLinker": {},
-        "MPIRecv": {},
-        "MPIRecvWait": {},
-        "MPISend": {},
-        "MPISendWait": {},
-        "MakeSlice": {},
-        "Max": {},
-        "MaxAndArgmax": {},
-        "Mean": {},
-        "MethodNotDefined": {},
-        "Min": {},
-        "Mode": {},
-        "Node": {},
-        "NoneTypeT": {},
-        "Nonzero": {},
-        "NotScalarConstantError": {},
-        "Op": {},
-        "OpWiseCLinker": {},
-        "OpenMPOp": {},
-        "OrderedDict": {},
-        "OrderedUpdates": {},
-        "Param": {},
-        "ParamsType": {},
-        "PerformLinker": {},
-        "PermuteRowElements": {},
-        "ProfileStats": {},
-        "Rebroadcast": {},
-        "Reshape": {},
-        "Reversible": {},
-        "ScalarFromTensor": {},
-        "Sequence": {},
-        "Shape": {},
-        "ShapeError": {},
-        "SingletonType": {},
-        "SliceConstant": {},
-        "SliceType": {},
-        "SpecifyShape": {},
-        "Split": {},
-        "Subtensor": {},
-        "SubtensorPrinter": {},
-        "Sum": {},
-        "SymbolicInput": {},
-        "SymbolicOutput": {},
-        "TensorConstant": {},
-        "TensorConstantSignature": {},
-        "TensorFromScalar": {},
-        "TensorType": {},
-        "TensorVariable": {},
-        "Tile": {},
-        "Tri": {},
-        "Type": {},
-        "TypeError": {},
-        "Variable": {},
-        "_tensor_py_operators": {},
-        "chain": {},
-        "change_flags": {},
-        "complex": {},
-        "dict": {},
-        "groupby": {},
-        "numeric_grad": {},
-        "object": {},
-        "object2": {},
-        "partial": {},
-        "tuple": {}
-       },
-       "_model_module_version": "^1.0.4",
-       "_view_module": "jupyter-cytoscape",
-       "_view_module_version": "^1.0.4",
-       "edges": [
-        "IPY_MODEL_b957d1be6873454d9515719f59a8e3d5",
-        "IPY_MODEL_bc4ebcd464b146e7bd7b32451f3765b9",
-        "IPY_MODEL_50a68ed55dd94fdca7e6e88ff3ed9819",
-        "IPY_MODEL_e28396f29b0d48eea405864650880146",
-        "IPY_MODEL_b094d7eb6be446b08efec792b9347b5e",
-        "IPY_MODEL_4e36066dda37461db7a3d44151557b42",
-        "IPY_MODEL_87a281bb979c4caa87aa0cebbf25e24e",
-        "IPY_MODEL_0d7745d27dff41e383ecb3da0f739140",
-        "IPY_MODEL_84914a9a28ec4f6985bb921f0e993ee1",
-        "IPY_MODEL_862139780e04429ab6fa38a4a5a238e8",
-        "IPY_MODEL_2904b28264514b6198f00ef1b222b39b",
-        "IPY_MODEL_8abf124c2f7745c8b535c8a9dc18637c",
-        "IPY_MODEL_51d3afad15fa4c08957b4c0f725daa6f",
-        "IPY_MODEL_c608ca10b69149a19703bc377e696402",
-        "IPY_MODEL_213318f3378145c68d2516a214761f53",
-        "IPY_MODEL_c91bbd447bc14e78996d6e8eb0a7120d",
-        "IPY_MODEL_cd9b405b98404d4494d6dd4176068893",
-        "IPY_MODEL_e3eeec9eceb84280a394a4cd0615f001",
-        "IPY_MODEL_b512b89847094349bd0019d83b7384be",
-        "IPY_MODEL_96ccf447ed8f4482bb559349b8bda1ee",
-        "IPY_MODEL_d6ab5a8facb44cf183f189f4efbd147a",
-        "IPY_MODEL_7eccad3f153f4abdb984af8383be4fc8",
-        "IPY_MODEL_81d7573c7c2c40f08a9488b04247cdea",
-        "IPY_MODEL_d8b3af65fb574b33ada7642d7e9040ea",
-        "IPY_MODEL_392e40ee037041f58fde00e9d00abcbe",
-        "IPY_MODEL_ea0fb98c3165401fadc62241430a8049",
-        "IPY_MODEL_6cfd3e7073d0475290118090c22a3225",
-        "IPY_MODEL_48cfb2b2a4ab411cacd1672e0c0320b7",
-        "IPY_MODEL_fc4b8c9c89ba4a32a4c1d9013e87c711",
-        "IPY_MODEL_ca2af3b2f0214f4083114da94f9a6265",
-        "IPY_MODEL_c91cc908ecbb42f8939a87883b3cff82",
-        "IPY_MODEL_8ec3e085d11a48e0b7f7f6007f0bdbc8",
-        "IPY_MODEL_83c193eeca9f4cc685234b81db10a5ae",
-        "IPY_MODEL_e31afffb910a4748b8a00e4015decb3c",
-        "IPY_MODEL_b737bf21224a4eb1af651b5e89131048",
-        "IPY_MODEL_0a9879ad64054a099950c9bf4304a7fc",
-        "IPY_MODEL_69ebd4a918be4b099a9550ff5846833c",
-        "IPY_MODEL_2e4beb5a3ef345c28e4fa60295dd4db5",
-        "IPY_MODEL_5b96d4b63e25442d853d46aa8d3f2292",
-        "IPY_MODEL_ba94e69db84944b4b909b544a1f0c736",
-        "IPY_MODEL_fbe72b157f0e4c31af0416456cf21e82",
-        "IPY_MODEL_1f8f919bae5744fbbf36734a31908659",
-        "IPY_MODEL_476b003029da441685039f353ac715bb",
-        "IPY_MODEL_297cd1cb263a4ad0b2321e384ed2d8b7",
-        "IPY_MODEL_0388b8b29c34472996cc4cdb6407b049",
-        "IPY_MODEL_531de1d3d4614a86a8a4deec43b42694",
-        "IPY_MODEL_201dd4bc7cae4b2e8306036ecbd381c2",
-        "IPY_MODEL_9c27413d5a9944f49568c46ea434991d",
-        "IPY_MODEL_dd4d3c38de3b4f4abef15264cdd4d065",
-        "IPY_MODEL_986fb0512dc44511953f55e7a520c375",
-        "IPY_MODEL_92298070bd2c4163829059117c7f3a4d",
-        "IPY_MODEL_a49d8fa18cd74e59ae4d431175dc2fef",
-        "IPY_MODEL_3135f96b0c574e9d8841c33c9263b6c6",
-        "IPY_MODEL_661d3e7393a44a1f9a5b6805aea12b52",
-        "IPY_MODEL_2340c07adc6044768bbe6b9e4baeec6d",
-        "IPY_MODEL_def84d61ed6247a7a97f9c16a977ac55",
-        "IPY_MODEL_c418b794aab042fd81a08ac2d80ebbc7",
-        "IPY_MODEL_94ce1896cf8a421fa416360261b7eb51",
-        "IPY_MODEL_4c0533b9a7204bada7abff075dd771e8",
-        "IPY_MODEL_5dd39a1e2a854a61a7487519eabad921",
-        "IPY_MODEL_d2b9ab27f2b54b0bba169da9504ac9b3",
-        "IPY_MODEL_87ba547678484588bfdb660042b333bb",
-        "IPY_MODEL_560c485869ab42a4b628e3007d3f2ea1",
-        "IPY_MODEL_1b3efa11ba6d4b80b2c1d2345e8752e8",
-        "IPY_MODEL_77d8e1e663e6413c875db905d92c00a7",
-        "IPY_MODEL_472cec6b1556423facf672a58d572177",
-        "IPY_MODEL_f6a7f6eb20ad4c6087f961161fe95178",
-        "IPY_MODEL_1cbf9bc46ad1449bbdf21ff4c3263450",
-        "IPY_MODEL_4c1cde2663ed4ac2b1c6944666970439",
-        "IPY_MODEL_4582f058ac0b4c4c8f147dd5822d8a19",
-        "IPY_MODEL_8fce41f5052f4ebfa3c0c34510b27c7c",
-        "IPY_MODEL_5e6e47b1a4f34d35b2454ef9392cb46f",
-        "IPY_MODEL_c13f23d417b3483f9a642697e8e0621e",
-        "IPY_MODEL_d16d2bcc83e6494cad021d11d8ef63df",
-        "IPY_MODEL_8fb4c086c2b042a2938c021b9ca7fe2d",
-        "IPY_MODEL_6a62fa57c5724ed49b469289f46aac4d",
-        "IPY_MODEL_437a3da0f49a4042a8def494764130d5",
-        "IPY_MODEL_9c8979fe78e142c78a83d105d1eea8e1",
-        "IPY_MODEL_b676d07a99d24f60891b0fe2dea8caaa",
-        "IPY_MODEL_b43d56094008486697ad18c4cdea52db",
-        "IPY_MODEL_9275d10f0c4842459efd9c9b3d88645b",
-        "IPY_MODEL_794d82636fbb467680b7d41527226d9a",
-        "IPY_MODEL_002d6da2e6fe4cb3925e3889b33129cf",
-        "IPY_MODEL_8922fe3c83cc4590b05465146f82f6e5",
-        "IPY_MODEL_0999a7817f9943ffb8dc09adbc50d7ec",
-        "IPY_MODEL_bd2c2d666d954155bf63ff2b139cfbdf",
-        "IPY_MODEL_6fe29d2821574c6695e0d9c08795ebbf",
-        "IPY_MODEL_35cb6efbe0894682aa751e3090db3fb8",
-        "IPY_MODEL_5d2ef7631b464e13a306a054d29f05ae",
-        "IPY_MODEL_45285d5fa8b846418fc1597227b29b6f",
-        "IPY_MODEL_75367771cace45b996893355d2fa4901",
-        "IPY_MODEL_89501976951b4a73b6be6ab19d389ab6",
-        "IPY_MODEL_fe907466d90b4c979aac96acdafc08f7",
-        "IPY_MODEL_98a781e7aa654a1f9253ad544a2cc070",
-        "IPY_MODEL_c0f726148d954e058dadfb3257488e68",
-        "IPY_MODEL_33e168a3f9074518be942c01863d3551",
-        "IPY_MODEL_93909cb6a078481ca24c7f55c91c3e1f",
-        "IPY_MODEL_b7a51a6eb28b45538b736a8cae7a90fa",
-        "IPY_MODEL_4ae475a73f9a419395a16520cdaafe68",
-        "IPY_MODEL_c70dcce052ca4c19a499744a26758c6c",
-        "IPY_MODEL_c2829437ac854a0eaf68a98fdc769c47",
-        "IPY_MODEL_0f3793e67c574ae6b16d99f0ea76c654",
-        "IPY_MODEL_97124f78b4a34f8b9cc8a23cb74fd8ea",
-        "IPY_MODEL_46cdcb19451c44a69b9c4b76b0c32b54"
-       ],
-       "nodes": [
-        "IPY_MODEL_4f94e7e7192b43788a33eeee0bdebd6c",
-        "IPY_MODEL_18a7eb283b9549db8df3539b5e847cef",
-        "IPY_MODEL_d6c648f5dc2c4f60961a47d23c8aca37",
-        "IPY_MODEL_d946f9e1c43b44e59936f7f1d5d2b038",
-        "IPY_MODEL_f9d3cab360f9411aa4153f5bf9718a23",
-        "IPY_MODEL_1ca7a7bbe6314bb2ba2df5c065fb3de3",
-        "IPY_MODEL_4c5d8e778e1c42eaafee221a498370a9",
-        "IPY_MODEL_1d62ad221ed1429fa940456a7c54affc",
-        "IPY_MODEL_f78122391ce1474f8333a74c0c524e14",
-        "IPY_MODEL_8ee49be361304d43b19d192e2c297044",
-        "IPY_MODEL_56600419be624fc4ba7ca01740dcba8c",
-        "IPY_MODEL_5b318bc2e17f44b3891520948cfe7384",
-        "IPY_MODEL_3b0145e58db14f759bacff4f3922f7e8",
-        "IPY_MODEL_602b531645b64416abdd26fff4b2539f",
-        "IPY_MODEL_aba99ad304184228b8596b30ff1e7fdc",
-        "IPY_MODEL_cdf958a7775546098748ff7efd238441",
-        "IPY_MODEL_e02a9595b82d42f1a6b5387cd0fca04e",
-        "IPY_MODEL_1b18dd448439490b93652283173adce2",
-        "IPY_MODEL_2ca85c74202d428b9c8a18081e980267",
-        "IPY_MODEL_5861dab966bd40df9563ffa73daae29e",
-        "IPY_MODEL_e78d2548ef4b47bd953d6991b845845a",
-        "IPY_MODEL_973b7d9cf2bf47ce8e7729138ee91036",
-        "IPY_MODEL_52b4cfa7813b46208806a812cf930e52",
-        "IPY_MODEL_7e7323b768c54c5d852b4f30671079c7",
-        "IPY_MODEL_a90396eaa06f4b3fa27cf33eb596d052",
-        "IPY_MODEL_52c992870efd43219229c761a4692119",
-        "IPY_MODEL_439d468e187945b59d7f536562ad9342",
-        "IPY_MODEL_aa6c6a64b22644e8ae665583133c7a71",
-        "IPY_MODEL_3faca9dc920f4d5c87dea21e47939c3e",
-        "IPY_MODEL_b9bb141a44f54451825f2a61931b67f5",
-        "IPY_MODEL_8aaec7fe1a594a4fbcb178112dc4ef6e",
-        "IPY_MODEL_5dbf9c5492744434b7b49ae460032af0",
-        "IPY_MODEL_1fa67f7ab2b140969b119bc0b1d472ff",
-        "IPY_MODEL_f6e463a894184c9b9405915013d53706",
-        "IPY_MODEL_9e45f715bd07405c960a7c5c904d8ba3",
-        "IPY_MODEL_a71be403a23a4868babacc15935bfdf9",
-        "IPY_MODEL_40b0bceafb2448878ce3b6c1bef953d9",
-        "IPY_MODEL_53777e18249747318bf7ea8396791c8b",
-        "IPY_MODEL_6d11b6053b9e46ba98a36286cf996687",
-        "IPY_MODEL_1ff84084f0b0477985fa12e6f91600ec",
-        "IPY_MODEL_6c4d6f8df1d343198192cfd826b710ac",
-        "IPY_MODEL_c6225cd776ba40cbb6499c432771cd32",
-        "IPY_MODEL_87581980db644a73934e38d2c6d24cca",
-        "IPY_MODEL_7708ec2fe77549bea27f22d012b0e6a5",
-        "IPY_MODEL_aeaa5a68fd5b4d90a9d47f1540ccf193",
-        "IPY_MODEL_c231c4025f944bccafcc2ab5ea419fac",
-        "IPY_MODEL_bcdcc13e312a473599b232d9b9caaf81",
-        "IPY_MODEL_ea1e064749c04ab3a3437990c5fb6188",
-        "IPY_MODEL_4b23eea474794b6795a26c95d88079df",
-        "IPY_MODEL_c4ba9842bfaf45f7b5622b82fab7e27d",
-        "IPY_MODEL_cf22e4419cc04f91aaa635449a0522bc",
-        "IPY_MODEL_d819999be0374e66808d5d0d15db1c78",
-        "IPY_MODEL_5b4e4ca46fed432798507b1d05fc773b",
-        "IPY_MODEL_6030cfbbd4bf42f686772d66bb43f59a",
-        "IPY_MODEL_63840bffd9314dd8954c70079ead8ac9",
-        "IPY_MODEL_850e399f3aa74075929578df68cda19c",
-        "IPY_MODEL_0c9f9f3f606b4304b79f1534380d217d",
-        "IPY_MODEL_0a8c035be3cb43ee90bc471e30ded51d",
-        "IPY_MODEL_b58c0f170fc1414fb584c62d6ed06bae",
-        "IPY_MODEL_44c717bbd0e444ed806a84de8caaaf4c",
-        "IPY_MODEL_a21ee2730fb744199194603fb14b948b",
-        "IPY_MODEL_6c22662c9a3f45e0a3cc73c87f2df5eb",
-        "IPY_MODEL_94f23270e91e430ab1c96ac408f84c14",
-        "IPY_MODEL_4d6dc4e13d6e4586a5ac9ff181a48d61",
-        "IPY_MODEL_35d25408d0874a9db36b0a170e549203",
-        "IPY_MODEL_768a8d816c8f4aa1b6e87a313e91155f",
-        "IPY_MODEL_e9c39013c455480eadc1871ae2f5c5c7",
-        "IPY_MODEL_db0ad294cbaa4d5b890f8bf2b17167bd",
-        "IPY_MODEL_ca86318bea7349e9afd234877657e1ed",
-        "IPY_MODEL_efb11436ee6446bcb841418fa64710a0",
-        "IPY_MODEL_cee1f44fbf5f4d2ca3c7bae46b969f37",
-        "IPY_MODEL_27ff6520d333492ba76fc1c8b4c8f880",
-        "IPY_MODEL_105d941de88e40b2952c28a5e4564bd2",
-        "IPY_MODEL_fcdfb0983d674aa8b17f0c3bfa0e8027",
-        "IPY_MODEL_9b49d9ac76fa420987b1c19fa6c48170",
-        "IPY_MODEL_9b9db182836e435cbed1218f345e217d",
-        "IPY_MODEL_d8d5da20f0874536b416b08e6adba1ad",
-        "IPY_MODEL_9fa1a33fb93f4c159aea6a7d1672c5ab",
-        "IPY_MODEL_fee5b74fd8db4035a17e6f7edf1ce6b8",
-        "IPY_MODEL_5c5fc5e51c3f4589908ef330dce0d8db",
-        "IPY_MODEL_a1549a47109946d3ab077a2674e42fa3",
-        "IPY_MODEL_376b7ad031184a71aedb281351713756",
-        "IPY_MODEL_a4a3988c0b0748d1bd089109fa1a755e",
-        "IPY_MODEL_a389825deccd4e07b2db24a4d3ccb99a",
-        "IPY_MODEL_43c93d8aa9fd4c52a11cbfcf1d1ccf0a",
-        "IPY_MODEL_4b84d219bb0f4afb9688d95920eb1b22",
-        "IPY_MODEL_f34e89c901714d77bd6d9b54e3907c90",
-        "IPY_MODEL_36bf721b1daf4a03a9e5c2ae770da8b7",
-        "IPY_MODEL_5e0aa0dde1c94e929a56396f791f94ee",
-        "IPY_MODEL_9022ac5f9da44ca5a22d521ab09642e1",
-        "IPY_MODEL_694e076a78a04fd783103c009257cb97",
-        "IPY_MODEL_f390cba96b8344a58577128212a63e77",
-        "IPY_MODEL_acb1014657134b6591e2ded3c0bc51d8",
-        "IPY_MODEL_1714002d62074ab187c1630ef9bff6cf",
-        "IPY_MODEL_b9876357098a444682d2f23f4b48017e",
-        "IPY_MODEL_c4127a8ef51d41678911a3f8adfca0d4",
-        "IPY_MODEL_bb139baf325c40e7818fd42fd7da38f0",
-        "IPY_MODEL_3d38d2af0f87432f987ab52d1598da55",
-        "IPY_MODEL_56dc29367cfc470ba34aeac170534625",
-        "IPY_MODEL_e47a7925dd2a4d5d9afcfbc800c5d329",
-        "IPY_MODEL_f714b3565f974f23a8ba38e8a2c421c6",
-        "IPY_MODEL_6662f279482e4d9eae8235f9d51d97e0",
-        "IPY_MODEL_4a498380b94c40389e39b8a9b1d79aa4",
-        "IPY_MODEL_9793dc298e17495bbe5d28ab1e26761f",
-        "IPY_MODEL_54446d71dd29431585ba6016f59a038a"
-       ]
-      }
-     },
-     "75367771cace45b996893355d2fa4901": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "ff0d244a-19aa-4977-8c45-8e3da6f1b357",
-        "source": "dict",
-        "target": "OrderedDict"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "768a8d816c8f4aa1b6e87a313e91155f": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "MakeSlice",
-        "label": "MakeSlice",
-        "name": "MakeSlice",
-        "tooltip": "theano.tensor.type_other"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2740.45
-       }
-      }
-     },
-     "7708ec2fe77549bea27f22d012b0e6a5": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Argmax",
-        "label": "Argmax",
-        "name": "Argmax",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1764.65
-       }
-      }
-     },
-     "77d8e1e663e6413c875db905d92c00a7": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "73d0c70b-7296-4f2b-9e59-f8f42b777a0e",
-        "source": "Op",
-        "target": "ExtractDiag"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "794d82636fbb467680b7d41527226d9a": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "364377aa-a911-497e-bdde-aec81bedbfa6",
-        "source": "Op",
-        "target": "Shape"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "7e7323b768c54c5d852b4f30671079c7": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "OpWiseCLinker",
-        "label": "OpWiseCLinker",
-        "name": "OpWiseCLinker",
-        "tooltip": "theano.gof.cc"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3544.05
-       }
-      }
-     },
-     "7eccad3f153f4abdb984af8383be4fc8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "f45f05ee-a03a-4ccf-b68a-9968cd6f9884",
-        "source": "object",
-        "target": "chain"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "81d7573c7c2c40f08a9488b04247cdea": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "cc5b63f9-e2ee-482e-b7a9-d6bb2c54d8ba",
-        "source": "object",
-        "target": "groupby"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "83c193eeca9f4cc685234b81db10a5ae": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "06394351-a0c5-4d4c-8bce-f8de699ea93a",
-        "source": "SingletonType",
-        "target": "Generic"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "84914a9a28ec4f6985bb921f0e993ee1": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "4c181aae-30d5-4d14-b971-93822fa61cff",
-        "source": "object",
-        "target": "Container"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "850e399f3aa74075929578df68cda19c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "ExtractDiag",
-        "label": "ExtractDiag",
-        "name": "ExtractDiag",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2166.45
-       }
-      }
-     },
-     "862139780e04429ab6fa38a4a5a238e8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "520d7069-c033-4ce2-99b5-53bcf0778180",
-        "source": "object",
-        "target": "FunctionMaker"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "87581980db644a73934e38d2c6d24cca": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "AllocEmpty",
-        "label": "AllocEmpty",
-        "name": "AllocEmpty",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1707.25
-       }
-      }
-     },
-     "87a281bb979c4caa87aa0cebbf25e24e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "d2772004-e38e-4eb6-9b81-01f982bdd2c4",
-        "source": "object",
-        "target": "object2"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "87ba547678484588bfdb660042b333bb": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "430e86a9-6867-4421-9661-7ec8308d30cf",
-        "source": "Op",
-        "target": "DiffOp"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "8922fe3c83cc4590b05465146f82f6e5": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "90f297bf-1f6d-4b42-997d-8cb17e3f603c",
-        "source": "Op",
-        "target": "Split"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "89501976951b4a73b6be6ab19d389ab6": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "8228bcb7-dc79-4ac5-8fc4-e702e173730e",
-        "source": "TypeError",
-        "target": "AdvancedIndexingError"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "8aaec7fe1a594a4fbcb178112dc4ef6e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "PerformLinker",
-        "label": "PerformLinker",
-        "name": "PerformLinker",
-        "tooltip": "theano.gof.link"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3601.45
-       }
-      }
-     },
-     "8abf124c2f7745c8b535c8a9dc18637c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "2423ba04-7123-49b7-b0e9-9a099ac85c70",
-        "source": "object",
-        "target": "BaseException"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "8ec3e085d11a48e0b7f7f6007f0bdbc8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "d4b5097c-60ab-4c10-9da6-e4e2df284128",
-        "source": "Generic",
-        "target": "NoneTypeT"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "8ee49be361304d43b19d192e2c297044": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "DualLinker",
-        "label": "DualLinker",
-        "name": "DualLinker",
-        "tooltip": "theano.gof.cc"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 3429.25
-       }
-      }
-     },
-     "8fb4c086c2b042a2938c021b9ca7fe2d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "edbf16d3-a937-4e82-a90b-f1d1b400f05c",
-        "source": "Op",
-        "target": "MakeSlice"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "8fce41f5052f4ebfa3c0c34510b27c7c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "93c48020-c297-4e26-a185-6eb3803fc8df",
-        "source": "Op",
-        "target": "MPIRecv"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "9022ac5f9da44ca5a22d521ab09642e1": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Sum",
-        "label": "Sum",
-        "name": "Sum",
-        "tooltip": "theano.tensor.elemwise"
-       },
-       "position": {
-        "x": 624.175,
-        "y": 1936.85
-       }
-      }
-     },
-     "92298070bd2c4163829059117c7f3a4d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "48ec98e0-f7b9-4833-a371-0a979640f138",
-        "source": "Op",
-        "target": "AdvancedIncSubtensor"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "9275d10f0c4842459efd9c9b3d88645b": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "a46bd10d-1750-4551-9bca-b2a263987763",
-        "source": "Op",
-        "target": "ScalarFromTensor"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "93909cb6a078481ca24c7f55c91c3e1f": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "3db492d8-2f77-4fbc-95c7-13af8bf70d04",
-        "source": "COp",
-        "target": "DimShuffle"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "94ce1896cf8a421fa416360261b7eb51": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "9e73ebda-a8bb-44ba-8e89-629563276c2a",
-        "source": "Op",
-        "target": "Argmax"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "94f23270e91e430ab1c96ac408f84c14": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "MPIRecvWait",
-        "label": "MPIRecvWait",
-        "name": "MPIRecvWait",
-        "tooltip": "theano.tensor.io"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2568.25
-       }
-      }
-     },
-     "96ccf447ed8f4482bb559349b8bda1ee": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "bc9c0e10-2bf9-4119-ae67-db7692fa4ce0",
-        "source": "object",
-        "target": "_tensor_py_operators"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "97124f78b4a34f8b9cc8a23cb74fd8ea": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "dbb47bba-931d-4f39-9814-ec5ebe68bfe2",
-        "source": "_tensor_py_operators",
-        "target": "TensorVariable"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "973b7d9cf2bf47ce8e7729138ee91036": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "Mode",
-        "label": "Mode",
-        "name": "Mode",
-        "tooltip": "theano.compile.mode"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 3945.85
-       }
-      }
-     },
-     "9793dc298e17495bbe5d28ab1e26761f": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "partial",
-        "label": "partial",
-        "name": "partial",
-        "tooltip": "functools"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4663.35
-       }
-      }
-     },
-     "986fb0512dc44511953f55e7a520c375": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "5a97a4fa-048b-4864-a4a6-677fc2e98a22",
-        "source": "Op",
-        "target": "ARange"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "98a781e7aa654a1f9253ad544a2cc070": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "27d7545a-e78c-45f5-b069-2302e77bd5d7",
-        "source": "CAReduce",
-        "target": "Mean"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "9b49d9ac76fa420987b1c19fa6c48170": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "PermuteRowElements",
-        "label": "PermuteRowElements",
-        "name": "PermuteRowElements",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2912.65
-       }
-      }
-     },
-     "9b9db182836e435cbed1218f345e217d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "Rebroadcast",
-        "label": "Rebroadcast",
-        "name": "Rebroadcast",
-        "tooltip": "theano.compile.ops"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2970.05
-       }
-      }
-     },
-     "9c27413d5a9944f49568c46ea434991d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "7e45b44e-6ce3-4d82-8a12-3e1146a90fe0",
-        "source": "LocalLinker",
-        "target": "PerformLinker"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "9c8979fe78e142c78a83d105d1eea8e1": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "d310d363-ea04-4150-a091-8952907e0402",
-        "source": "Op",
-        "target": "PermuteRowElements"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "9e45f715bd07405c960a7c5c904d8ba3": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "AdvancedIncSubtensor",
-        "label": "AdvancedIncSubtensor",
-        "name": "AdvancedIncSubtensor",
-        "tooltip": "theano.tensor.subtensor"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1362.8500000000001
-       }
-      }
-     },
-     "9fa1a33fb93f4c159aea6a7d1672c5ab": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "ScalarFromTensor",
-        "label": "ScalarFromTensor",
-        "name": "ScalarFromTensor",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3084.85
-       }
-      }
-     },
-     "a1549a47109946d3ab077a2674e42fa3": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "Iterable",
-        "label": "Iterable",
-        "name": "Iterable",
-        "tooltip": "collections.abc"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4232.85
-       }
-      }
-     },
-     "a21ee2730fb744199194603fb14b948b": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "LoadFromDisk",
-        "label": "LoadFromDisk",
-        "name": "LoadFromDisk",
-        "tooltip": "theano.tensor.io"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2453.45
-       }
-      }
-     },
-     "a389825deccd4e07b2db24a4d3ccb99a": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "SliceConstant",
-        "label": "SliceConstant",
-        "name": "SliceConstant",
-        "tooltip": "theano.tensor.type_other"
-       },
-       "position": {
-        "x": 624.175,
-        "y": 903.6500000000001
-       }
-      }
-     },
-     "a49d8fa18cd74e59ae4d431175dc2fef": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "a92c3c05-a37c-4a9e-8c67-8d0dd0e2ff6e",
-        "source": "Op",
-        "target": "AdvancedIncSubtensor1"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "a4a3988c0b0748d1bd089109fa1a755e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "ShapeError",
-        "label": "ShapeError",
-        "name": "ShapeError",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3945.85
-       }
-      }
-     },
-     "a71be403a23a4868babacc15935bfdf9": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "AdvancedIncSubtensor1",
-        "label": "AdvancedIncSubtensor1",
-        "name": "AdvancedIncSubtensor1",
-        "tooltip": "theano.tensor.subtensor"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1420.25
-       }
-      }
-     },
-     "a90396eaa06f4b3fa27cf33eb596d052": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "OpenMPOp",
-        "label": "OpenMPOp",
-        "name": "OpenMPOp",
-        "tooltip": "theano.gof.op"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1248.0500000000002
-       }
-      }
-     },
-     "aa6c6a64b22644e8ae665583133c7a71": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "dict",
-        "label": "dict",
-        "name": "dict",
-        "tooltip": "builtins"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4003.25
-       }
-      }
-     },
-     "aba99ad304184228b8596b30ff1e7fdc": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Type",
-        "label": "Type",
-        "name": "Type",
-        "tooltip": "theano.gof.type"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 1018.45
-       }
-      }
-     },
-     "acb1014657134b6591e2ded3c0bc51d8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "TensorConstant",
-        "label": "TensorConstant",
-        "name": "TensorConstant",
-        "tooltip": "theano.tensor.var"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 4318.95
-       }
-      }
-     },
-     "aeaa5a68fd5b4d90a9d47f1540ccf193": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "CAReduce",
-        "label": "CAReduce",
-        "name": "CAReduce",
-        "tooltip": "theano.tensor.elemwise"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1822.0500000000002
-       }
-      }
-     },
-     "b094d7eb6be446b08efec792b9347b5e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "4e856c40-f0cb-4564-8173-31e577d6d885",
-        "source": "object2",
-        "target": "Type"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "b43d56094008486697ad18c4cdea52db": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "8c630543-6071-403b-9611-3f9b4fde9253",
-        "source": "Op",
-        "target": "Reshape"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "b512b89847094349bd0019d83b7384be": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "1d971698-b768-4da7-b63b-91d826ca7af8",
-        "source": "object",
-        "target": "SubtensorPrinter"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "b58c0f170fc1414fb584c62d6ed06bae": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "IncSubtensor",
-        "label": "IncSubtensor",
-        "name": "IncSubtensor",
-        "tooltip": "theano.tensor.subtensor"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2338.65
-       }
-      }
-     },
-     "b676d07a99d24f60891b0fe2dea8caaa": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "d710da5e-83ab-4d2a-bc80-1082391f9c00",
-        "source": "Op",
-        "target": "Rebroadcast"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "b737bf21224a4eb1af651b5e89131048": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "81617f62-66d8-4ef8-a2d5-20d51d7b48bc",
-        "source": "Type",
-        "target": "DisconnectedType"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "b7a51a6eb28b45538b736a8cae7a90fa": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "e02a98a4-ac6e-4c9a-abed-856e16cd544a",
-        "source": "NotScalarConstantError",
-        "target": "EmptyConstantError"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "b957d1be6873454d9515719f59a8e3d5": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "bb929f58-f10b-4478-9f61-3d5460bb1397",
-        "source": "Node",
-        "target": "Apply"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "b9876357098a444682d2f23f4b48017e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "TensorConstantSignature",
-        "label": "TensorConstantSignature",
-        "name": "TensorConstantSignature",
-        "tooltip": "theano.tensor.var"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 4433.75
-       }
-      }
-     },
-     "b9bb141a44f54451825f2a61931b67f5": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "Param",
-        "label": "Param",
-        "name": "Param",
-        "tooltip": "theano.compile.function.pfunc"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3658.85
-       }
-      }
-     },
-     "ba94e69db84944b4b909b544a1f0c736": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "7371cc97-83cf-4155-8962-0730ebb58faa",
-        "source": "SymbolicInput",
-        "target": "In"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "bb139baf325c40e7818fd42fd7da38f0": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "TensorFromScalar",
-        "label": "TensorFromScalar",
-        "name": "TensorFromScalar",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3371.85
-       }
-      }
-     },
-     "bc4ebcd464b146e7bd7b32451f3765b9": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "439a7dca-42ab-4666-be0f-e63804178a62",
-        "source": "Node",
-        "target": "Variable"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "bcdcc13e312a473599b232d9b9caaf81": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Default",
-        "label": "Default",
-        "name": "Default",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1936.85
-       }
-      }
-     },
-     "bd2c2d666d954155bf63ff2b139cfbdf": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "1cbadee2-cd94-4f56-a8c1-8a594ea31503",
-        "source": "Op",
-        "target": "TensorFromScalar"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "c0f726148d954e058dadfb3257488e68": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "323dc02c-f0ad-4247-91ed-b4ab50cf087f",
-        "source": "CAReduce",
-        "target": "Min"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "c13f23d417b3483f9a642697e8e0621e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "30ef3779-8944-42d2-bb28-6457565bee3c",
-        "source": "Op",
-        "target": "MPISend"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "c231c4025f944bccafcc2ab5ea419fac": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Choose",
-        "label": "Choose",
-        "name": "Choose",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1879.45
-       }
-      }
-     },
-     "c2829437ac854a0eaf68a98fdc769c47": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "53d12c08-94e1-47de-9509-958495922399",
-        "source": "CAReduceDtype",
-        "target": "Sum"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "c4127a8ef51d41678911a3f8adfca0d4": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "tuple",
-        "label": "tuple",
-        "name": "tuple",
-        "tooltip": "builtins"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4433.75
-       }
-      }
-     },
-     "c418b794aab042fd81a08ac2d80ebbc7": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "69e6dd3b-d8c9-4a9e-b348-6399e0b53851",
-        "source": "Op",
-        "target": "AllocEmpty"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "c4ba9842bfaf45f7b5622b82fab7e27d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "COp",
-        "label": "COp",
-        "name": "COp",
-        "tooltip": "theano.gof.op"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2051.65
-       }
-      }
-     },
-     "c608ca10b69149a19703bc377e696402": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "3fbd975b-913a-4c71-a17f-4ac647e505dc",
-        "source": "object",
-        "target": "dict"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "c6225cd776ba40cbb6499c432771cd32": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "AllocDiag",
-        "label": "AllocDiag",
-        "name": "AllocDiag",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1649.8500000000001
-       }
-      }
-     },
-     "c70dcce052ca4c19a499744a26758c6c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "152a8428-812b-4e3c-b66c-f85d22d25136",
-        "source": "Iterable",
-        "target": "Reversible"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "c91bbd447bc14e78996d6e8eb0a7120d": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "ad1bc663-4ede-4e49-a59e-ae06aa672d83",
-        "source": "object",
-        "target": "ProfileStats"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "c91cc908ecbb42f8939a87883b3cff82": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "50bf51c8-1a9d-45db-8dd1-ca4a493b307b",
-        "source": "Variable",
-        "target": "Constant"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "ca2af3b2f0214f4083114da94f9a6265": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "d127d904-453d-4925-b6cd-ba7cd4681dd9",
-        "source": "Constant",
-        "target": "SliceConstant"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "ca86318bea7349e9afd234877657e1ed": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Mean",
-        "label": "Mean",
-        "name": "Mean",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 1592.45
-       }
-      }
-     },
-     "cd9b405b98404d4494d6dd4176068893": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "1c3ff7a0-bb4d-4d94-aa52-9f7060b6784f",
-        "source": "object",
-        "target": "change_flags"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "cdf958a7775546098748ff7efd238441": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "In",
-        "label": "In",
-        "name": "In",
-        "tooltip": "theano.compile.io"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 3658.85
-       }
-      }
-     },
-     "cee1f44fbf5f4d2ca3c7bae46b969f37": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Min",
-        "label": "Min",
-        "name": "Min",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 1879.45
-       }
-      }
-     },
-     "cf22e4419cc04f91aaa635449a0522bc": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "green",
-        "id": "DisconnectedType",
-        "label": "DisconnectedType",
-        "name": "DisconnectedType",
-        "tooltip": "theano.gradient"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1018.45
-       }
-      }
-     },
-     "d16d2bcc83e6494cad021d11d8ef63df": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "ea1756ea-d472-4440-a99b-14829ea4d7d6",
-        "source": "Op",
-        "target": "MPISendWait"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "d2b9ab27f2b54b0bba169da9504ac9b3": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "ddc6ad94-3eaf-4df4-a19f-8085fdc63a03",
-        "source": "Op",
-        "target": "Default"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "d6ab5a8facb44cf183f189f4efbd147a": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "00fd44d5-9d89-47c0-9849-68ff9382d35d",
-        "source": "object",
-        "target": "tuple"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "d6c648f5dc2c4f60961a47d23c8aca37": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "object2",
-        "label": "object2",
-        "name": "object2",
-        "tooltip": "theano.gof.utils"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 1047.15
-       }
-      }
-     },
-     "d819999be0374e66808d5d0d15db1c78": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Dot",
-        "label": "Dot",
-        "name": "Dot",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2109.05
-       }
-      }
-     },
-     "d8b3af65fb574b33ada7642d7e9040ea": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "453423ca-4e2c-4281-b2c6-3d6a0c87e228",
-        "source": "object",
-        "target": "numeric_grad"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "d8d5da20f0874536b416b08e6adba1ad": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Reshape",
-        "label": "Reshape",
-        "name": "Reshape",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3027.45
-       }
-      }
-     },
-     "d946f9e1c43b44e59936f7f1d5d2b038": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "object",
-        "label": "object",
-        "name": "object",
-        "tooltip": "builtins"
-       },
-       "position": {
-        "x": 123.32500000000002,
-        "y": 4146.75
-       }
-      }
-     },
-     "db0ad294cbaa4d5b890f8bf2b17167bd": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "MaxAndArgmax",
-        "label": "MaxAndArgmax",
-        "name": "MaxAndArgmax",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 2797.85
-       }
-      }
-     },
-     "dd4d3c38de3b4f4abef15264cdd4d065": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "a99398bc-3d78-48cd-975a-9cf1ff628460",
-        "source": "Op",
-        "target": "OpenMPOp"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "def84d61ed6247a7a97f9c16a977ac55": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "cef15f7f-7fc4-4860-af92-7d66c1731999",
-        "source": "Op",
-        "target": "AllocDiag"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "e02a9595b82d42f1a6b5387cd0fca04e": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "red",
-        "id": "SymbolicInput",
-        "label": "SymbolicInput",
-        "name": "SymbolicInput",
-        "tooltip": "theano.compile.io"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 3658.85
-       }
-      }
-     },
-     "e28396f29b0d48eea405864650880146": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "aa7f9263-1784-4e44-b446-4b35139815ba",
-        "source": "object2",
-        "target": "FunctionGraph"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "e31afffb910a4748b8a00e4015decb3c": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "3cd78bed-424e-44e8-8bb8-e6a111b29ebd",
-        "source": "Type",
-        "target": "SingletonType"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "e3eeec9eceb84280a394a4cd0615f001": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "7c901020-4ec5-45e4-8422-2975861f5b43",
-        "source": "object",
-        "target": "Iterable"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "e47a7925dd2a4d5d9afcfbc800c5d329": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Tri",
-        "label": "Tri",
-        "name": "Tri",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3486.65
-       }
-      }
-     },
-     "e78d2548ef4b47bd953d6991b845845a": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "LocalLinker",
-        "label": "LocalLinker",
-        "name": "LocalLinker",
-        "tooltip": "theano.gof.link"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 3544.05
-       }
-      }
-     },
-     "e9c39013c455480eadc1871ae2f5c5c7": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Max",
-        "label": "Max",
-        "name": "Max",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 531.425,
-        "y": 1535.0500000000002
-       }
-      }
-     },
-     "ea0fb98c3165401fadc62241430a8049": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "5bea532b-d8f5-429f-88fc-1a4f0ecee47d",
-        "source": "object",
-        "target": "complex"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "ea1e064749c04ab3a3437990c5fb6188": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "DiffOp",
-        "label": "DiffOp",
-        "name": "DiffOp",
-        "tooltip": "theano.tensor.extra_ops"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1994.25
-       }
-      }
-     },
-     "efb11436ee6446bcb841418fa64710a0": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "MethodNotDefined",
-        "label": "MethodNotDefined",
-        "name": "MethodNotDefined",
-        "tooltip": "theano.gof.utils"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3888.45
-       }
-      }
-     },
-     "f34e89c901714d77bd6d9b54e3907c90": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "Split",
-        "label": "Split",
-        "name": "Split",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 3257.05
-       }
-      }
-     },
-     "f390cba96b8344a58577128212a63e77": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "TensorType",
-        "label": "TensorType",
-        "name": "TensorType",
-        "tooltip": "theano.tensor.type"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1190.65
-       }
-      }
-     },
-     "f6a7f6eb20ad4c6087f961161fe95178": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "3b3b0590-a6c7-4d2f-bb19-e0a5dc32a153",
-        "source": "Op",
-        "target": "Flatten"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "f6e463a894184c9b9405915013d53706": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "blue",
-        "id": "ARange",
-        "label": "ARange",
-        "name": "ARange",
-        "tooltip": "theano.tensor.basic"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1305.45
-       }
-      }
-     },
-     "f714b3565f974f23a8ba38e8a2c421c6": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "chain",
-        "label": "chain",
-        "name": "chain",
-        "tooltip": "itertools"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 4491.15
-       }
-      }
-     },
-     "f78122391ce1474f8333a74c0c524e14": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "Container",
-        "label": "Container",
-        "name": "Container",
-        "tooltip": "theano.gof.link"
-       },
-       "position": {
-        "x": 203.47500000000002,
-        "y": 3515.35
-       }
-      }
-     },
-     "f9d3cab360f9411aa4153f5bf9718a23": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "CLinker",
-        "label": "CLinker",
-        "name": "CLinker",
-        "tooltip": "theano.gof.cc"
-       },
-       "position": {
-        "x": 311.625,
-        "y": 3371.85
-       }
-      }
-     },
-     "fbe72b157f0e4c31af0416456cf21e82": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "6c5ef2e0-8168-484c-8f08-674d182481e3",
-        "source": "Exception",
-        "target": "InconsistencyError"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "fc4b8c9c89ba4a32a4c1d9013e87c711": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "b1a8eaef-16a3-4d0a-aaf2-b36506ecb980",
-        "source": "Linker",
-        "target": "LocalLinker"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "fcdfb0983d674aa8b17f0c3bfa0e8027": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "yellow",
-        "id": "ParamsType",
-        "label": "ParamsType",
-        "name": "ParamsType",
-        "tooltip": "theano.gof.params_type"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 1075.8500000000001
-       }
-      }
-     },
-     "fe907466d90b4c979aac96acdafc08f7": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "EdgeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "classes": " directed ",
-       "data": {
-        "id": "e9f9e6f2-ffd8-436a-b661-e44d45a9009e",
-        "source": "CAReduce",
-        "target": "Max"
-       },
-       "position": {
-        "x": 0,
-        "y": 0
-       }
-      }
-     },
-     "fee5b74fd8db4035a17e6f7edf1ce6b8": {
-      "model_module": "jupyter-cytoscape",
-      "model_module_version": "^1.0.4",
-      "model_name": "NodeModel",
-      "state": {
-       "_model_module_version": "^1.0.4",
-       "_view_module_version": "^1.0.4",
-       "data": {
-        "color": "pink",
-        "id": "Sequence",
-        "label": "Sequence",
-        "name": "Sequence",
-        "tooltip": "collections.abc"
-       },
-       "position": {
-        "x": 423.275,
-        "y": 4232.85
-       }
-      }
-     }
-    },
-    "version_major": 2,
-    "version_minor": 0
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/setup.cfg b/setup.cfg
index 369e05b6d6..5f7faddea3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,7 +4,7 @@ ignore = E203,E231,E501,E741,W503,W504,C901
 max-line-length = 88
 per-file-ignores =
     **/__init__.py:F401,E402,F403
-    theano/sparse/sandbox/sp2.py:F401
+    aesara/sparse/sandbox/sp2.py:F401
     tests/tensor/test_basic_scipy.py:E402
     tests/sparse/test_basic.py:E402
     tests/sparse/test_opt.py:E402
@@ -18,17 +18,17 @@ per-file-ignores =
     tests/gpuarray/test_elemwise.py:E402
     tests/gpuarray/test_others.py:E402
     tests/gpuarray/test_basic_ops.py:E402
-    theano/graph/unify.py:F811
+    aesara/graph/unify.py:F811
 exclude =
     versioneer.py
     doc/
-    theano/_version.py
+    aesara/_version.py
 
 [versioneer]
 VCS = git
 style = pep440
-versionfile_source = theano/_version.py
-versionfile_build = theano/_version.py
+versionfile_source = aesara/_version.py
+versionfile_build = aesara/_version.py
 tag_prefix = rel-
 
 [tool:pytest]
@@ -47,4 +47,4 @@ lines_after_imports = 2
 lines_between_sections = 1
 honor_noqa = True
 skip_gitignore = True
-skip = theano/version.py, **/__init__.py
\ No newline at end of file
+skip = aesara/version.py, **/__init__.py
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 5415f1f259..7c5c141f32 100755
--- a/setup.py
+++ b/setup.py
@@ -9,14 +9,14 @@ def read_file(filename):
         return buff.read()
 
 
-NAME = "Theano-PyMC"
+NAME = "aesara"
 MAINTAINER = "PyMC developers"
 MAINTAINER_EMAIL = "pymc-devs@gmail.com"
 DESCRIPTION = (
     "Optimizing compiler for evaluating mathematical expressions on CPUs and GPUs."
 )
 LONG_DESCRIPTION = read_file("DESCRIPTION.txt")
-URL = "http://deeplearning.net/software/theano/"
+URL = "https://github.com/pymc-devs/aesara"
 LICENSE = "BSD"
 AUTHOR = "pymc-devs"
 AUTHOR_EMAIL = "pymc-devs@gmail.com"
@@ -72,17 +72,17 @@ def read_file(filename):
                 "ChangeLog",
                 "c_code/*",
             ],
-            "theano.misc": ["*.sh"],
-            "theano.d3viz": ["html/*", "css/*", "js/*"],
+            "aesara.misc": ["*.sh"],
+            "aesara.d3viz": ["html/*", "css/*", "js/*"],
         },
         entry_points={
             "console_scripts": [
-                "theano-cache = bin.theano_cache:main",
+                "aesara-cache = bin.aesara_cache:main",
             ]
         },
         keywords=" ".join(
             [
-                "theano",
+                "aesara",
                 "math",
                 "numerical",
                 "symbolic",
diff --git a/tests/compile/function/test_function.py b/tests/compile/function/test_function.py
index f341c7325e..a8232642e8 100644
--- a/tests/compile/function/test_function.py
+++ b/tests/compile/function/test_function.py
@@ -8,12 +8,12 @@
 import numpy as np
 import pytest
 
-import theano
-from theano.compile.function import function, function_dump
-from theano.compile.io import In
-from theano.configdefaults import config
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.type import (
+import aesara
+from aesara.compile.function import function, function_dump
+from aesara.compile.io import In
+from aesara.configdefaults import config
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.type import (
     bscalar,
     bvector,
     dscalar,
@@ -60,7 +60,7 @@ class TestFunctionIn:
     def test_in_strict(self):
 
         a = dvector()
-        b = theano.shared(7)
+        b = aesara.shared(7)
         out = a + b
 
         f = function([In(a, strict=False)], out)
@@ -79,14 +79,14 @@ def test_in_strict(self):
     def test_explicit_shared_input(self):
         # This is not a test of the In class per se, but the In class relies
         # on the fact that shared variables cannot be explicit inputs
-        a = theano.shared(1.0)
+        a = aesara.shared(1.0)
         with pytest.raises(TypeError):
             function([a], a + 1)
 
     def test_in_shared_variable(self):
         # Ensure that an error is raised if the In wrapped is used to wrap
         # a shared variable
-        a = theano.shared(1.0)
+        a = aesara.shared(1.0)
         a_wrapped = In(a, update=a + 1)
         with pytest.raises(TypeError):
             function([a_wrapped])
@@ -130,7 +130,7 @@ def test_in_update_wrong_dtype(self):
     def test_in_update_shared(self):
         # Test that using both In() with updates and shared variables with
         # updates in the same function behaves as expected
-        shared_var = theano.shared(1.0)
+        shared_var = aesara.shared(1.0)
         a = dscalar("a")
         a_wrapped = In(a, value=0.0, update=shared_var)
         f = function([a_wrapped], [], updates={shared_var: a}, mode="FAST_RUN")
@@ -244,16 +244,16 @@ def test_pickle_unpickle_with_reoptimization():
         mode = "FAST_RUN"
     x1 = fmatrix("x1")
     x2 = fmatrix("x2")
-    x3 = theano.shared(np.ones((10, 10), dtype=floatX))
-    x4 = theano.shared(np.ones((10, 10), dtype=floatX))
+    x3 = aesara.shared(np.ones((10, 10), dtype=floatX))
+    x4 = aesara.shared(np.ones((10, 10), dtype=floatX))
     y = tt_sum(tt_sum(tt_sum(x1 ** 2 + x2) + x3) + x4)
 
     updates = OrderedDict()
     updates[x3] = x3 + 1
     updates[x4] = x4 + 1
-    f = theano.function([x1, x2], y, updates=updates, mode=mode)
+    f = aesara.function([x1, x2], y, updates=updates, mode=mode)
 
-    # now pickle the compiled theano fn
+    # now pickle the compiled aesara fn
     string_pkl = pickle.dumps(f, -1)
 
     in1 = np.ones((10, 10), dtype=floatX)
@@ -276,16 +276,16 @@ def test_pickle_unpickle_without_reoptimization():
         mode = "FAST_RUN"
     x1 = fmatrix("x1")
     x2 = fmatrix("x2")
-    x3 = theano.shared(np.ones((10, 10), dtype=floatX))
-    x4 = theano.shared(np.ones((10, 10), dtype=floatX))
+    x3 = aesara.shared(np.ones((10, 10), dtype=floatX))
+    x4 = aesara.shared(np.ones((10, 10), dtype=floatX))
     y = tt_sum(tt_sum(tt_sum(x1 ** 2 + x2) + x3) + x4)
 
     updates = OrderedDict()
     updates[x3] = x3 + 1
     updates[x4] = x4 + 1
-    f = theano.function([x1, x2], y, updates=updates, mode=mode)
+    f = aesara.function([x1, x2], y, updates=updates, mode=mode)
 
-    # now pickle the compiled theano fn
+    # now pickle the compiled aesara fn
     string_pkl = pickle.dumps(f, -1)
 
     # compute f value
diff --git a/tests/compile/function/test_pfunc.py b/tests/compile/function/test_pfunc.py
index df9471cf4b..ca064e9b25 100644
--- a/tests/compile/function/test_pfunc.py
+++ b/tests/compile/function/test_pfunc.py
@@ -1,15 +1,15 @@
 import numpy as np
 import pytest
 
-import theano
-from theano import tensor as tt
-from theano.compile.function import pfunc
-from theano.compile.io import In
-from theano.compile.sharedvalue import shared
-from theano.configdefaults import config
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.type import (
+import aesara
+from aesara import tensor as tt
+from aesara.compile.function import pfunc
+from aesara.compile.io import In
+from aesara.compile.sharedvalue import shared
+from aesara.configdefaults import config
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.type import (
     bscalar,
     bvector,
     dmatrices,
@@ -25,7 +25,7 @@
     lscalar,
     wvector,
 )
-from theano.utils import PYTHON_INT_BITWIDTH
+from aesara.utils import PYTHON_INT_BITWIDTH
 
 
 def data_of(s):
@@ -380,7 +380,7 @@ def test_update_err_broadcast(self):
         # the update_var has type matrix, and the update expression
         # is a broadcasted scalar, and that should be allowed.
         with pytest.raises(TypeError):
-            theano.function(
+            aesara.function(
                 inputs=[],
                 outputs=[],
                 updates={output_var: output_var.sum().dimshuffle("x", "x")},
@@ -390,7 +390,7 @@ def test_duplicate_updates(self):
         x, y = dmatrices("x", "y")
         z = shared(np.ones((2, 3)))
         with pytest.raises(ValueError):
-            theano.function([x, y], [z], updates=[(z, (z + x + y)), (z, (z - x))])
+            aesara.function([x, y], [z], updates=[(z, (z + x + y)), (z, (z - x))])
 
     def test_givens(self):
         x = shared(0)
@@ -623,7 +623,7 @@ def test_default_updates_input(self):
         assert y.get_value() == 2
 
         # a is needed as input if y.default_update is used
-        with pytest.raises(theano.graph.fg.MissingInputError):
+        with pytest.raises(aesara.graph.fg.MissingInputError):
             pfunc([], x)
 
     def test_default_updates_partial_graph(self):
@@ -657,8 +657,8 @@ def test_givens_replaces_shared_variable2(self):
 
     def test_duplicate_inputs(self):
         x = lscalar("x")
-        with pytest.raises(theano.compile.UnusedInputError):
-            theano.function([x, x, x], x)
+        with pytest.raises(aesara.compile.UnusedInputError):
+            aesara.function([x, x, x], x)
 
     def test_update_same(self):
         # There was a bug in CVM, triggered when a shared variable
@@ -675,8 +675,8 @@ def test_update_same(self):
         # Is that all the comment above meant, or is the CVM intended
         # to add extra non-determinism? Or is the CVM meant to
         # deterministically but arbitrarily pick an order for the updates?
-        f = theano.function([], [], updates=[(a, a), (b, (2 * b))])
-        g = theano.function([], [], updates=[(a, (a * 2)), (b, b)])
+        f = aesara.function([], [], updates=[(a, a), (b, (2 * b))])
+        g = aesara.function([], [], updates=[(a, (a * 2)), (b, b)])
 
         f()
         assert a.get_value(borrow=True).shape == (), a.get_value()
@@ -693,8 +693,8 @@ def test_update_equiv(self):
 
         # See comment in test_update_same about why we try both
         # shared variables.
-        f = theano.function([], [], updates=[(a, a), (b, (2 * b - b))])
-        g = theano.function([], [], updates=[(a, (a * 2 - a)), (b, b)])
+        f = aesara.function([], [], updates=[(a, a), (b, (2 * b - b))])
+        g = aesara.function([], [], updates=[(a, (a * 2 - a)), (b, b)])
 
         f()
         assert a.get_value(borrow=True).shape == (), a.get_value()
@@ -705,7 +705,7 @@ def test_update_equiv(self):
 
 
 class TestAliasingRules:
-    # 1. Theano manages its own memory space, which typically does not overlap
+    # 1. Aesara manages its own memory space, which typically does not overlap
     # with the memory of normal python variables that the user uses.
     #
     # 2. shared variables are allocated in this memory space, as are the
@@ -714,20 +714,20 @@ class TestAliasingRules:
     # 3. Physically, this managed memory space may be spread across the host,
     # on a GPU device(s), or even on a remote machine.
     #
-    # 4. Theano assumes that shared variables are never aliased to one another,
+    # 4. Aesara assumes that shared variables are never aliased to one another,
     # and tries to make it impossible to accidentally alias them.
     #
-    # 5. Theano's managed data is constant while Theano Functions are not running
-    # and theano library code is not running.
+    # 5. Aesara's managed data is constant while Aesara Functions are not running
+    # and aesara library code is not running.
     #
     # 6. The default behaviour of Function is to return user-space values for
     # outputs, but this can be overridden (borrow=True) for better performance,
     # in which case the returned value may be aliased to managed memory, and
-    # potentially invalidated by the next Theano Function call or call to theano
+    # potentially invalidated by the next Aesara Function call or call to aesara
     # library code.
 
     def shared(self, x):
-        return theano.shared(x)
+        return aesara.shared(x)
 
     def test_shared_constructor_copies(self):
         # shared constructor makes copy
@@ -736,22 +736,22 @@ def test_shared_constructor_copies(self):
         A = self.shared(orig_a)
         assert not np.may_share_memory(orig_a, data_of(A))
 
-        # rule #2 reading back from theano-managed memory
+        # rule #2 reading back from aesara-managed memory
         assert not np.may_share_memory(A.get_value(borrow=False), data_of(A))
 
     def test_sparse_input_aliasing_affecting_inplace_operations(self):
         sp = pytest.importorskip("scipy", minversion="0.7.0")
 
-        from theano import sparse
+        from aesara import sparse
 
-        # Note: to trigger this bug with theano rev 4586:2bc6fc7f218b,
+        # Note: to trigger this bug with aesara rev 4586:2bc6fc7f218b,
         #        you need to make in inputs mutable (so that inplace
         #        operations are used) and to break the elemwise composition
         #        with some non-elemwise op (here dot)
 
         x = sparse.SparseType("csc", dtype="float64")()
         y = sparse.SparseType("csc", dtype="float64")()
-        f = theano.function(
+        f = aesara.function(
             [In(x, mutable=True), In(y, mutable=True)], (x + y) + (x + y)
         )
         # Test 1. If the same variable is given twice
@@ -792,7 +792,7 @@ def test_sparse_input_aliasing_affecting_inplace_operations(self):
 
     def test_input_aliasing_affecting_inplace_operations(self):
 
-        # Note: to trigger this bug with theano rev 4586:2bc6fc7f218b,
+        # Note: to trigger this bug with aesara rev 4586:2bc6fc7f218b,
         #        you need to make in inputs mutable (so that inplace
         #        operations are used) and to break the elemwise composition
         #        with some non-elemwise op (here dot)
@@ -800,7 +800,7 @@ def test_input_aliasing_affecting_inplace_operations(self):
         y = dvector()
         m1 = dmatrix()
         m2 = dmatrix()
-        f = theano.function(
+        f = aesara.function(
             [
                 In(x, mutable=True),
                 In(y, mutable=True),
@@ -846,7 +846,7 @@ def test_input_aliasing_affecting_inplace_operations(self):
 
     def test_partial_input_aliasing_affecting_inplace_operations(self):
 
-        # Note: to trigger this bug with theano rev 4586:2bc6fc7f218b,
+        # Note: to trigger this bug with aesara rev 4586:2bc6fc7f218b,
         #        you need to make in inputs mutable ( so that inplace
         #        operations are used) and to break the elemwise composition
         #        with some non-elemwise op ( here dot )
@@ -862,7 +862,7 @@ def test_partial_input_aliasing_affecting_inplace_operations(self):
         #   and a shares memory with b, b shares memory with c, but
         #   c does not share memory with a
 
-        f = theano.function(
+        f = aesara.function(
             [
                 In(x, mutable=True),
                 In(y, mutable=True),
@@ -958,7 +958,7 @@ def test_no_aliasing_1(self):
         z = np.zeros((2, 2))
         f(z)
         assert not np.may_share_memory(data_of(A), data_of(B))
-        # Theano tries to maintain its own memory space.
+        # Aesara tries to maintain its own memory space.
         assert not np.may_share_memory(z, data_of(B))
         assert np.all(data_of(B) == z)
 
@@ -982,7 +982,7 @@ def test_no_aliasing_2(self):
         # shared vars may not be aliased
         assert not np.may_share_memory(data_of(A), data_of(B))
 
-        # theano should have been smart enough to not make copies
+        # aesara should have been smart enough to not make copies
         assert np.may_share_memory(data_of(A), data_of_b)
         assert np.may_share_memory(data_of(B), data_of_a)
 
@@ -1001,7 +1001,7 @@ def test_no_aliasing_2b(self):
         data_of_b = data_of(B)
 
         f = pfunc([], [], updates=[(A, B[:, ::-1]), (B, A.T)])
-        # theano.printing.debugprint(f)
+        # aesara.printing.debugprint(f)
         f()
         # correctness (doesn't actually test the view...)
         assert np.all(data_of(A) == -0.5)
@@ -1010,8 +1010,8 @@ def test_no_aliasing_2b(self):
         # shared vars may not be aliased
         assert not np.may_share_memory(data_of(A), data_of(B))
 
-        # theano should have been smart enough to not make copies
-        if theano.config.mode not in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
+        # aesara should have been smart enough to not make copies
+        if aesara.config.mode not in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
             # We don't ask DebugMode and FAST_COMPILE not to make copy.
             # We have the right to do so.
             assert np.all(data_of(A) < 5)
@@ -1042,7 +1042,7 @@ def test_rebuild_strict(self):
         w = imatrix()
         x, y = ivectors("x", "y")
         z = x * y
-        f = theano.function([w, y], z, givens=[(x, w)], rebuild_strict=False)
+        f = aesara.function([w, y], z, givens=[(x, w)], rebuild_strict=False)
         z_val = f(np.ones((3, 5), dtype="int32"), np.arange(5, dtype="int32"))
         assert z_val.ndim == 2
         assert np.all(z_val == np.ones((3, 5)) * np.arange(5))
diff --git a/tests/compile/function/test_types.py b/tests/compile/function/test_types.py
index 06dc73db65..61cef90447 100644
--- a/tests/compile/function/test_types.py
+++ b/tests/compile/function/test_types.py
@@ -6,21 +6,21 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.gpuarray
-import theano.tensor as tt
-from theano.compile.function import function
-from theano.compile.function.types import UnusedInputError
-from theano.compile.io import In, Out
-from theano.compile.mode import Mode
-from theano.configdefaults import config
-from theano.graph.basic import Constant
-from theano.graph.fg import MissingInputError
-from theano.graph.opt import OpKeyOptimizer, PatternSub
-from theano.tensor.math import dot
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tanh
-from theano.tensor.type import (
+import aesara
+import aesara.gpuarray
+import aesara.tensor as tt
+from aesara.compile.function import function
+from aesara.compile.function.types import UnusedInputError
+from aesara.compile.io import In, Out
+from aesara.compile.mode import Mode
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant
+from aesara.graph.fg import MissingInputError
+from aesara.graph.opt import OpKeyOptimizer, PatternSub
+from aesara.tensor.math import dot
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tanh
+from aesara.tensor.type import (
     dmatrix,
     dscalar,
     dscalars,
@@ -33,7 +33,7 @@
     scalars,
     vector,
 )
-from theano.utils import exc_message
+from aesara.utils import exc_message
 
 
 def PatternOptimizer(p1, p2, ign=True):
@@ -320,13 +320,13 @@ def test_copy(self):
     def test_copy_share_memory(self):
         x = fscalar("x")
         # SharedVariable for tests, one of them has update
-        y = theano.shared(value=1)
-        z = theano.shared(value=2)
+        y = aesara.shared(value=1)
+        z = aesara.shared(value=2)
         out = tanh((x + y + 2) / (x + z - 0.2) ** 2)
 
         # Test for different linkers
         for mode in ["FAST_RUN", "FAST_COMPILE"]:
-            ori = theano.function([x], [out], mode=mode, updates={z: z + 1})
+            ori = aesara.function([x], [out], mode=mode, updates={z: z + 1})
             cpy = ori.copy(share_memory=True)
 
             # Test if memories shared
@@ -354,17 +354,17 @@ def test_copy_share_memory(self):
 
     def test_swap_SharedVariable(self):
         i = iscalar()
-        x_list = theano.shared(value=np.random.rand(10).astype(config.floatX))
+        x_list = aesara.shared(value=np.random.rand(10).astype(config.floatX))
 
         x = scalar("x")
         # SharedVariable for tests, one of them has update
-        y = theano.shared(value=1, name="y")
-        z = theano.shared(value=2, name="z")
-        m = theano.shared(value=0, name="m")
+        y = aesara.shared(value=1, name="y")
+        z = aesara.shared(value=2, name="z")
+        m = aesara.shared(value=0, name="m")
 
         # SharedVariable to replace
-        y_rpl = theano.shared(value=3, name="y_rpl")
-        z_rpl = theano.shared(value=4, name="z_rpl")
+        y_rpl = aesara.shared(value=3, name="y_rpl")
+        z_rpl = aesara.shared(value=4, name="z_rpl")
         swap = {y: y_rpl, z: z_rpl}
         map_SV = {"y_rpl": y_rpl, "z_rpl": z_rpl}
 
@@ -374,7 +374,7 @@ def test_swap_SharedVariable(self):
         # for mode in ["FAST_RUN","FAST_COMPILE"]:
         second_time = False
         for mode in ["FAST_RUN", "FAST_COMPILE"]:
-            ori = theano.function(
+            ori = aesara.function(
                 [i],
                 [out],
                 mode=mode,
@@ -421,25 +421,25 @@ def test_swap_SharedVariable_with_given(self):
         # A special testcase for logistic_sgd.py in Deep Learning Tutorial
         # This test assert that SharedVariable in different function have same storage
 
-        train_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX))
-        test_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX))
+        train_x = aesara.shared(value=np.random.rand(10, 10).astype(config.floatX))
+        test_x = aesara.shared(value=np.random.rand(10, 10).astype(config.floatX))
 
-        train_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX))
-        test_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX))
+        train_y = aesara.shared(value=np.random.rand(10, 1).astype(config.floatX))
+        test_y = aesara.shared(value=np.random.rand(10, 1).astype(config.floatX))
 
         i = iscalar("index")
         x = vector("x")
         y = vector("y")
         # this formular has no sense but for a test
         out = (tt_sum(x) - y) ** 2
-        train = theano.function(
+        train = aesara.function(
             [i],
             out,
             givens={x: train_x[i], y: train_y[i]},
             updates={train_x: train_x + 0.1},
         )
 
-        test_def = theano.function([i], out, givens={x: test_x[i], y: test_y[i]})
+        test_def = aesara.function([i], out, givens={x: test_x[i], y: test_y[i]})
         test_cpy = train.copy(
             swap={train_x: test_x, train_y: test_y}, delete_updates=True
         )
@@ -451,15 +451,15 @@ def test_copy_delete_updates(self):
         w = iscalar("w")
         x = fscalar("x")
         # SharedVariable for tests, one of them has update
-        y = theano.shared(value=1, name="y")
-        z = theano.shared(value=2, name="z")
+        y = aesara.shared(value=1, name="y")
+        z = aesara.shared(value=2, name="z")
         out = x + y + z
 
         # Test for different linkers
         # for mode in ["FAST_RUN","FAST_COMPILE"]:
         # second_time = False
         for mode in ["FAST_RUN", "FAST_COMPILE"]:
-            ori = theano.function([x], out, mode=mode, updates={z: z * 2})
+            ori = aesara.function([x], out, mode=mode, updates={z: z * 2})
             cpy = ori.copy(delete_updates=True)
 
             assert cpy(1)[0] == 4
@@ -469,10 +469,10 @@ def test_copy_delete_updates(self):
         # Test if unused implicit and explicit inputs from delete_updates
         # are ignored as intended.
         for mode in ["FAST_RUN", "FAST_COMPILE"]:
-            ori = theano.function([x], x, mode=mode, updates={z: z * 2})
+            ori = aesara.function([x], x, mode=mode, updates={z: z * 2})
             cpy = ori.copy(delete_updates=True)
 
-            ori = theano.function([x, w], x, mode=mode, updates={z: z + w})
+            ori = aesara.function([x, w], x, mode=mode, updates={z: z + w})
             cpy = ori.copy(delete_updates=True)
 
     def test_shared_state0(self):
@@ -573,19 +573,19 @@ def test_shared_state_not_implicit(self):
         assert dec[s] == -1
 
     def test_constant_output(self):
-        # Test that if the output is a constant, we respect the theano memory interface
-        f = theano.function([], tt.constant([4]))
+        # Test that if the output is a constant, we respect the aesara memory interface
+        f = aesara.function([], tt.constant([4]))
         # print f.maker.fgraph.toposort()
         out = f()
         assert (out == 4).all()
         out[0] = 3
         out2 = f()
-        # If the following 2 asserts fail it mean Theano broke it's memory contract.
+        # If the following 2 asserts fail it mean Aesara broke it's memory contract.
         assert out2 is not out
         assert (out2 == 4).all()
 
-        # Test that if the output is a constant and borrow, we respect the theano memory interface
-        f = theano.function([], Out(tt.constant([4]), borrow=True))
+        # Test that if the output is a constant and borrow, we respect the aesara memory interface
+        f = aesara.function([], Out(tt.constant([4]), borrow=True))
         # print f.maker.fgraph.toposort()
         out = f()
         assert (out == 4).all()
@@ -593,7 +593,7 @@ def test_constant_output(self):
         out2 = f()
 
         if isinstance(
-            theano.compile.mode.get_default_mode(), theano.compile.debugmode.DebugMode
+            aesara.compile.mode.get_default_mode(), aesara.compile.debugmode.DebugMode
         ):
             # In DebugMode, we don't implement optimization based on borrow on the output.
             assert (out2 == 4).all()
@@ -611,12 +611,12 @@ def test_borrow_input(self):
         aval = np.random.rand(3, 3)
 
         # when borrow=False, test that a destroy map cannot alias output to input
-        f = theano.function([In(a, borrow=False)], Out(a + 1, borrow=True))
+        f = aesara.function([In(a, borrow=False)], Out(a + 1, borrow=True))
         assert np.all(f(aval) == aval + 1)
         assert not np.may_share_memory(aval, f(aval))
 
         # when borrow=False, test that a viewmap cannot alias output to input
-        f = theano.function([In(a, borrow=False)], Out(a[0, :], borrow=True))
+        f = aesara.function([In(a, borrow=False)], Out(a[0, :], borrow=True))
         assert np.all(f(aval) == aval[0, :])
         assert not np.may_share_memory(aval, f(aval))
 
@@ -695,7 +695,7 @@ def test_default_values(self):
 
         a, b = dscalars("a", "b")
         c = a + b
-        func = theano.function([In(a, name="first"), In(b, value=1, name="second")], c)
+        func = aesara.function([In(a, name="first"), In(b, value=1, name="second")], c)
         x = func(first=1)
         try:
             func(second=2)
@@ -704,8 +704,8 @@ def test_default_values(self):
 
     def test_check_for_aliased_inputs(self):
         b = np.random.rand(5, 4)
-        s1 = theano.shared(b)
-        s2 = theano.shared(b)
+        s1 = aesara.shared(b)
+        s2 = aesara.shared(b)
         x1 = vector()
 
         # Assert cases we should not check for aliased inputs
@@ -717,7 +717,7 @@ def test_check_for_aliased_inputs(self):
         ]:
             if "inputs" not in d:
                 d["inputs"] = []
-            f = theano.function(**d)
+            f = aesara.function(**d)
             assert not f._check_for_aliased_inputs, d
 
         # Assert cases we should check for aliased inputs
@@ -740,7 +740,7 @@ def test_check_for_aliased_inputs(self):
         ]:
             if "inputs" not in d:
                 d["inputs"] = []
-            f = theano.function(**d)
+            f = aesara.function(**d)
 
             assert f._check_for_aliased_inputs, d
 
@@ -825,18 +825,18 @@ def test_deepcopy_trust_input(self):
         assert f.trust_input is g.trust_input
         f(np.asarray(2.0))
         with pytest.raises(
-            (ValueError, AttributeError, theano.compile.debugmode.InvalidValueError)
+            (ValueError, AttributeError, aesara.compile.debugmode.InvalidValueError)
         ):
             f(2.0)
         g(np.asarray(2.0))
         with pytest.raises(
-            (ValueError, AttributeError, theano.compile.debugmode.InvalidValueError)
+            (ValueError, AttributeError, aesara.compile.debugmode.InvalidValueError)
         ):
             g(2.0)
 
     def test_output_keys(self):
         x = vector()
-        f = theano.function([x], {"vec": x ** 2})
+        f = aesara.function([x], {"vec": x ** 2})
         o = f([2, 3, 4])
         assert isinstance(o, dict)
         assert np.allclose(o["vec"], [4, 9, 16])
@@ -1064,9 +1064,9 @@ def pers_load(id):
         b = np.random.rand(5, 4)
 
         x = matrix()
-        y = theano.shared(b)
+        y = aesara.shared(b)
 
-        f = theano.function([x], dot(x, y))
+        f = aesara.function([x], dot(x, y))
 
         from io import BytesIO
 
@@ -1153,7 +1153,7 @@ def test_empty_givens_updates():
 
 
 @pytest.mark.skipif(
-    not theano.gpuarray.pygpu_activated or config.mode == "DEBUG_MODE",
+    not aesara.gpuarray.pygpu_activated or config.mode == "DEBUG_MODE",
     reason="DEBUG_MODE forces synchronous behaviour which breaks this test",
 )
 def test_sync_update():
@@ -1164,17 +1164,17 @@ def test_sync_update():
     # higher.
 
     # this import needs to go first because it generates the
-    # local 'theano' variable.  You get an UnboundLocalError otherwise.
+    # local 'aesara' variable.  You get an UnboundLocalError otherwise.
     import tests.gpuarray.config
 
     sizes = [100, 500, 1000, 2000, 5000, 10000, 20000, 40000]
     size = sizes[0]
-    w = theano.gpuarray.gpuarray_shared_constructor(
+    w = aesara.gpuarray.gpuarray_shared_constructor(
         np.random.rand(size, size).astype("float32"),
         "w",
         target=tests.gpuarray.config.test_ctx_name,
     )
-    x = theano.gpuarray.gpuarray_shared_constructor(
+    x = aesara.gpuarray.gpuarray_shared_constructor(
         np.random.rand(size, size).astype("float32"),
         "x",
         target=tests.gpuarray.config.test_ctx_name,
@@ -1182,10 +1182,10 @@ def test_sync_update():
 
     updates = [(w, w + np.asarray(0.001, "float32") * dot(x, x))]
 
-    f = theano.function([], updates=updates, mode=tests.gpuarray.config.mode_with_gpu)
+    f = aesara.function([], updates=updates, mode=tests.gpuarray.config.mode_with_gpu)
     assert len(f.maker.fgraph.apply_nodes) == 1
     assert any(
-        isinstance(n.op, theano.gpuarray.blas.GpuGemm)
+        isinstance(n.op, aesara.gpuarray.blas.GpuGemm)
         for n in f.maker.fgraph.apply_nodes
     )
     # Make sure libgpuarray have compile all kernels
@@ -1245,20 +1245,20 @@ def test_FunctionMaker_cache_optimizations():
     with config.change_flags(cache_optimizations=True):
         a = fmatrix("a")
         b = fmatrix("b")
-        c = theano.shared(np.ones((10, 10), dtype=floatX))
-        d = theano.shared(np.ones((10, 10), dtype=floatX))
+        c = aesara.shared(np.ones((10, 10), dtype=floatX))
+        d = aesara.shared(np.ones((10, 10), dtype=floatX))
         e = tt_sum(tt_sum(tt_sum(a ** 2 + b) + c) + d)
-        f1 = theano.function([a, b], e, mode=mode)
+        f1 = aesara.function([a, b], e, mode=mode)
 
         # FIXME: We can do much better about testing this.
         assert os.path.exists(graph_db_file)
 
         m = fmatrix("x1")
         n = fmatrix("x2")
-        p = theano.shared(np.ones((10, 10), dtype=floatX))
-        q = theano.shared(np.ones((10, 10), dtype=floatX))
+        p = aesara.shared(np.ones((10, 10), dtype=floatX))
+        q = aesara.shared(np.ones((10, 10), dtype=floatX))
         j = tt_sum(tt_sum(tt_sum(m ** 2 + n) + p) + q)
-        f2 = theano.function([m, n], j, mode=mode)
+        f2 = aesara.function([m, n], j, mode=mode)
 
         in1 = np.ones((10, 10), dtype=floatX)
         in2 = np.ones((10, 10), dtype=floatX)
diff --git a/tests/compile/test_builders.py b/tests/compile/test_builders.py
index 04d8af9bc4..416424b71e 100644
--- a/tests/compile/test_builders.py
+++ b/tests/compile/test_builders.py
@@ -3,20 +3,20 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara import shared
+from aesara.compile.builders import OpFromGraph
+from aesara.compile.function import function
+from aesara.configdefaults import config
+from aesara.gradient import DisconnectedType, Rop, grad
+from aesara.graph.null_type import NullType
+from aesara.tensor.math import dot, exp
+from aesara.tensor.math import round as tt_round
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.nnet import sigmoid
+from aesara.tensor.random.utils import RandomStream
+from aesara.tensor.type import TensorType, matrices, matrix, scalar, vector, vectors
 from tests import unittest_tools
-from theano import shared
-from theano.compile.builders import OpFromGraph
-from theano.compile.function import function
-from theano.configdefaults import config
-from theano.gradient import DisconnectedType, Rop, grad
-from theano.graph.null_type import NullType
-from theano.tensor.math import dot, exp
-from theano.tensor.math import round as tt_round
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.nnet import sigmoid
-from theano.tensor.random.utils import RandomStream
-from theano.tensor.type import TensorType, matrices, matrix, scalar, vector, vectors
 
 
 class TestOpFromGraph(unittest_tools.InferShapeTester):
@@ -300,7 +300,7 @@ def f1(x, y):
             return y + tt_round(y)
 
         def f1_back(inputs, output_gradients):
-            return [output_gradients[0], theano.gradient.disconnected_type()]
+            return [output_gradients[0], aesara.gradient.disconnected_type()]
 
         op = cls_ofg(
             inputs=[x, y],
@@ -312,7 +312,7 @@ def f1_back(inputs, output_gradients):
 
         c = op(x, y)
 
-        g1 = theano.grad(c.sum(), x)
+        g1 = aesara.grad(c.sum(), x)
 
         out = g1.eval(
             {x: np.ones((5,), dtype=np.float32), y: np.ones((5,), dtype=np.float32)}
diff --git a/tests/compile/test_compilelock.py b/tests/compile/test_compilelock.py
index ee2c7724ec..bc0a684b02 100644
--- a/tests/compile/test_compilelock.py
+++ b/tests/compile/test_compilelock.py
@@ -8,7 +8,7 @@
 import filelock
 import pytest
 
-from theano.compile.compilelock import force_unlock, local_mem, lock_ctx
+from aesara.compile.compilelock import force_unlock, local_mem, lock_ctx
 
 
 def test_compilelock_errors():
diff --git a/tests/compile/test_debugmode.py b/tests/compile/test_debugmode.py
index 1aeac5adda..84d8e3155b 100644
--- a/tests/compile/test_debugmode.py
+++ b/tests/compile/test_debugmode.py
@@ -3,23 +3,23 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
+from aesara.compile import debugmode
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import COp, Op
+from aesara.graph.opt import local_optimizer
+from aesara.graph.optdb import EquilibriumDB
+from aesara.graph.toolbox import BadOptimization
+from aesara.tensor.math import add, dot, log
+from aesara.tensor.type import TensorType, dvector, fmatrix, fvector, vector
 from tests import unittest_tools as utt
-from theano.compile import debugmode
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import COp, Op
-from theano.graph.opt import local_optimizer
-from theano.graph.optdb import EquilibriumDB
-from theano.graph.toolbox import BadOptimization
-from theano.tensor.math import add, dot, log
-from theano.tensor.type import TensorType, dvector, fmatrix, fvector, vector
 
 
 def test_debugmode_basic():
     x = dvector()
-    f = theano.function([x], ((2.0 * x) + 7) / 2.0, mode=debugmode.DebugMode())
+    f = aesara.function([x], ((2.0 * x) + 7) / 2.0, mode=debugmode.DebugMode())
     f([1, 2])
 
 
@@ -96,7 +96,7 @@ def c_code(self, node, name, inp, out, sub):
 # inconsistent is a invalid op, whose perform and c_code do not match
 inconsistent = BROKEN_ON_PURPOSE_Add(False)
 
-# off_by_half is a good op, that is different from theano.sparse.sd_csc
+# off_by_half is a good op, that is different from aesara.sparse.sd_csc
 off_by_half = BROKEN_ON_PURPOSE_Add(True)
 
 
@@ -207,12 +207,12 @@ def test_badthunkoutput():
     a = dvector()
     b = dvector()
 
-    f_good = theano.function(
+    f_good = aesara.function(
         [a, b],
         off_by_half(a, b),
         mode=debugmode.DebugMode(check_c_code=config.cxx),
     )
-    f_inconsistent = theano.function(
+    f_inconsistent = aesara.function(
         [a, b],
         inconsistent(a, b),
         mode=debugmode.DebugMode(check_c_code=config.cxx),
@@ -241,7 +241,7 @@ def insert_broken_add(fgraph, node):
     a = dvector()
     b = dvector()
 
-    f = theano.function([a, b], a + b, mode=debugmode.DebugMode(optimizer=opt))
+    f = aesara.function([a, b], a + b, mode=debugmode.DebugMode(optimizer=opt))
 
     with pytest.raises(debugmode.BadOptimization) as einfo:
         f(
@@ -282,7 +282,7 @@ def insert_bad_dtype(fgraph, node):
     a = dvector()
     b = dvector()
 
-    f = theano.function([a, b], a + b, mode=debugmode.DebugMode(optimizer=opt))
+    f = aesara.function([a, b], a + b, mode=debugmode.DebugMode(optimizer=opt))
     with pytest.raises(ValueError, match=r"insert_bigger_b_add"):
         f(
             [1.0, 2.0, 3.0],
@@ -292,7 +292,7 @@ def insert_bad_dtype(fgraph, node):
     # Test that opt that do an illegal change still get the error from graph.
     with pytest.raises(BadOptimization, match=r"insert_bad_dtype") as einfo:
         with config.change_flags(on_opt_error="raise"):
-            f2 = theano.function(
+            f2 = aesara.function(
                 [a, b],
                 a + b,
                 mode=debugmode.DebugMode(optimizer=opt2, stability_patience=1),
@@ -333,7 +333,7 @@ def insert_broken_add_sometimes(fgraph, node):
     b = dvector()
 
     with pytest.raises(debugmode.StochasticOrder):
-        theano.function(
+        aesara.function(
             [a, b],
             add(a, b),
             mode=debugmode.DebugMode(
@@ -349,7 +349,7 @@ def insert_broken_add_sometimes(fgraph, node):
 )
 def test_just_c_code():
     x = dvector()
-    f = theano.function([x], wb2(x), mode=debugmode.DebugMode(check_py_code=False))
+    f = aesara.function([x], wb2(x), mode=debugmode.DebugMode(check_py_code=False))
     assert np.all(f([1, 2]) == [2, 4])
 
 
@@ -367,7 +367,7 @@ def perform(self, node, inp, out):
 
     x = dvector()
     y = dvector()
-    f = theano.function([x, y], BadAdd()(x, y), mode="DEBUG_MODE")
+    f = aesara.function([x, y], BadAdd()(x, y), mode="DEBUG_MODE")
 
     with pytest.raises(debugmode.BadDestroyMap):
         f([1, 2], [3, 4])
@@ -378,7 +378,7 @@ def perform(self, node, inp, out):
 )
 def test_baddestroymap_c():
     x = dvector()
-    f = theano.function([x], wb2i(x), mode=debugmode.DebugMode(check_py_code=False))
+    f = aesara.function([x], wb2i(x), mode=debugmode.DebugMode(check_py_code=False))
     with pytest.raises(debugmode.BadDestroyMap):
         assert np.all(f([1, 2]) == [2, 4])
 
@@ -407,14 +407,14 @@ def perform(self, node, inp, out):
     def test_badviewmap_ref(self):
         x = dvector()
         y = dvector()
-        f = theano.function([x, y], self.BadAddRef()(x, y), mode="DEBUG_MODE")
+        f = aesara.function([x, y], self.BadAddRef()(x, y), mode="DEBUG_MODE")
         with pytest.raises(debugmode.BadViewMap):
             f([1, 2], [3, 4])
 
     def test_badviewmap_slice(self):
         x = dvector()
         y = dvector()
-        f = theano.function([x, y], self.BadAddSlice()(x, y), mode="DEBUG_MODE")
+        f = aesara.function([x, y], self.BadAddSlice()(x, y), mode="DEBUG_MODE")
         with pytest.raises(debugmode.BadViewMap):
             f([1, 2], [3, 4])
 
@@ -423,7 +423,7 @@ def test_goodviewmap(self):
         goodop.view_map = {0: [1]}
         x = dvector()
         y = dvector()
-        f = theano.function([x, y], goodop(x, y), mode="DEBUG_MODE")
+        f = aesara.function([x, y], goodop(x, y), mode="DEBUG_MODE")
         # Shouldn't raise an error
         f([1, 5, 1], [3, 4, 2, 1, 4])
 
@@ -432,7 +432,7 @@ def test_goodviewmap(self):
     )
     def test_badviewmap_c(self):
         x = dvector()
-        f = theano.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
+        f = aesara.function([x], wb1i(x), mode=debugmode.DebugMode(check_py_code=False))
         with pytest.raises(debugmode.BadViewMap):
             f([1, 2])
 
@@ -455,7 +455,7 @@ def perform(self, node, inp, out):
 
         x = dvector("x")
         y = dvector("y")
-        f = theano.function([x, y], CustomOp()(x, y), mode="DEBUG_MODE")
+        f = aesara.function([x, y], CustomOp()(x, y), mode="DEBUG_MODE")
 
         r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8])
 
@@ -480,7 +480,7 @@ def perform(self, node, inp, out):
 
         x = dvector()
         y = dvector()
-        f = theano.function([x, y], CustomOp()(x, y), mode="DEBUG_MODE")
+        f = aesara.function([x, y], CustomOp()(x, y), mode="DEBUG_MODE")
 
         r0, r1 = f([1, 2, 3, 4], [5, 6, 7, 8])
 
@@ -506,7 +506,7 @@ def perform(self, node, inp, out):
 
         x = dvector("x")
         y = dvector("y")
-        f = theano.function([x, y], CustomOp()(x, y)[0] * 2, mode="DEBUG_MODE")
+        f = aesara.function([x, y], CustomOp()(x, y)[0] * 2, mode="DEBUG_MODE")
 
         r0 = f([1, 2, 3, 4], [5, 6, 7, 8])
 
@@ -514,7 +514,7 @@ def perform(self, node, inp, out):
 
     def test_aliased_outputs_bad(self):
         # here the alias between outputs is not ok because destroying one
-        # destroys the other, but there's no way to warn theano about it
+        # destroys the other, but there's no way to warn aesara about it
         # through the view_map mechanism.
         class CustomOp(Op):
             def make_node(self, a, b):
@@ -535,7 +535,7 @@ def perform(self, node, inp, out):
         y = dvector()
         bad_xy0, bad_xy1 = custom_op(x, y)
         out = bad_xy0 * 2 + bad_xy1 * 2
-        f = theano.function([x, y], out, mode="DEBUG_MODE")
+        f = aesara.function([x, y], out, mode="DEBUG_MODE")
 
         with pytest.raises(debugmode.BadViewMap):
             f([1, 2, 3, 4], [5, 6, 7, 8])
@@ -551,16 +551,16 @@ def perform(self, node, inp, out):
 class TestCheckIsfinite:
     def setup_method(self):
         self.old_ts = TensorType.filter_checks_isfinite
-        self.old_dm = theano.compile.mode.predefined_modes["DEBUG_MODE"].check_isfinite
+        self.old_dm = aesara.compile.mode.predefined_modes["DEBUG_MODE"].check_isfinite
 
     def teardown_method(self):
         TensorType.filter_checks_isfinite = self.old_ts
-        theano.compile.mode.predefined_modes["DEBUG_MODE"].check_isfinite = self.old_dm
+        aesara.compile.mode.predefined_modes["DEBUG_MODE"].check_isfinite = self.old_dm
 
     def test_check_isfinite(self):
         x = vector()
-        f = theano.function([x], (x + 2) * 5, mode="DEBUG_MODE")
-        g = theano.function([x], log(x), mode="DEBUG_MODE")
+        f = aesara.function([x], (x + 2) * 5, mode="DEBUG_MODE")
+        g = aesara.function([x], log(x), mode="DEBUG_MODE")
 
         # this should work
         f(np.log([3, 4, 5]).astype(config.floatX))
@@ -583,13 +583,13 @@ def test_check_isfinite(self):
 
         # this should disable the exception
         TensorType.filter_checks_isfinite = False
-        theano.compile.mode.predefined_modes["DEBUG_MODE"].check_isfinite = False
+        aesara.compile.mode.predefined_modes["DEBUG_MODE"].check_isfinite = False
         # insert several Inf
         f(np.asarray(np.asarray([1.0, 1.0, 1.0]) / 0, dtype=config.floatX))
 
     def test_check_isfinite_disabled(self):
         x = dvector()
-        f = theano.function(
+        f = aesara.function(
             [x], (x + 2) * 5, mode=debugmode.DebugMode(check_isfinite=False)
         )
 
@@ -745,7 +745,7 @@ def test_f_contiguous(self):
         # Should work
         mode = debugmode.DebugMode(check_preallocated_output=["c_contiguous"])
 
-        f = theano.function([a, b], out, mode=mode)
+        f = aesara.function([a, b], out, mode=mode)
         f(a_val, b_val)
         # print 'out_val =', out_val
         # print out_val.strides
@@ -754,7 +754,7 @@ def test_f_contiguous(self):
         # used incorrectly.
         mode = debugmode.DebugMode(check_preallocated_output=["f_contiguous"])
 
-        f = theano.function([a, b], out, mode=mode)
+        f = aesara.function([a, b], out, mode=mode)
 
         if config.cxx:
             with pytest.raises(debugmode.BadThunkOutput):
@@ -776,7 +776,7 @@ def test_f_contiguous_out(self):
         # Should work
         mode = debugmode.DebugMode(check_preallocated_output=["c_contiguous"])
 
-        f = theano.function([a, b], out, mode=mode)
+        f = aesara.function([a, b], out, mode=mode)
         f(a_val, b_val)
         # print 'out_val =', out_val
         # print out_val.strides
@@ -785,7 +785,7 @@ def test_f_contiguous_out(self):
         # used incorrectly.
         mode = debugmode.DebugMode(check_preallocated_output=["f_contiguous"])
 
-        f = theano.function([a, b], out, mode=mode)
+        f = aesara.function([a, b], out, mode=mode)
 
         if config.cxx:
             with pytest.raises(debugmode.BadThunkOutput):
@@ -797,7 +797,7 @@ def test_f_contiguous_out(self):
     def test_output_broadcast_tensor(self):
         v = fvector("v")
         c, r = VecAsRowAndCol()(v)
-        f = theano.function([v], [c, r])
+        f = aesara.function([v], [c, r])
 
         v_val = self.rng.randn(5).astype("float32")
         f(v_val)
diff --git a/tests/compile/test_misc.py b/tests/compile/test_misc.py
index 92987f7697..af490be7f9 100644
--- a/tests/compile/test_misc.py
+++ b/tests/compile/test_misc.py
@@ -1,12 +1,12 @@
 import numpy as np
 
-from theano.compile.function.pfunc import pfunc
-from theano.compile.sharedvalue import shared
-from theano.gradient import grad
-from theano.tensor.math import dot
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.nnet import sigmoid
-from theano.tensor.type import dvector
+from aesara.compile.function.pfunc import pfunc
+from aesara.compile.sharedvalue import shared
+from aesara.gradient import grad
+from aesara.tensor.math import dot
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.nnet import sigmoid
+from aesara.tensor.type import dvector
 
 
 class NNet:
diff --git a/tests/compile/test_mode.py b/tests/compile/test_mode.py
index b27b8059a6..9c697240d1 100644
--- a/tests/compile/test_mode.py
+++ b/tests/compile/test_mode.py
@@ -1,14 +1,14 @@
 import pytest
 
-import theano
-from theano.compile.mode import AddFeatureOptimizer, Mode
-from theano.graph.toolbox import NoOutputFromInplace
-from theano.tensor.math import dot, tanh
-from theano.tensor.type import matrix
+import aesara
+from aesara.compile.mode import AddFeatureOptimizer, Mode
+from aesara.graph.toolbox import NoOutputFromInplace
+from aesara.tensor.math import dot, tanh
+from aesara.tensor.type import matrix
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_no_output_from_implace():
     x = matrix()
@@ -18,7 +18,7 @@ def test_no_output_from_implace():
 
     # Ensure that the elemwise op that produces the output is inplace when
     # using a mode that does not include the optimization
-    fct_no_opt = theano.function([x, y], b, mode="FAST_RUN")
+    fct_no_opt = aesara.function([x, y], b, mode="FAST_RUN")
     op = fct_no_opt.maker.fgraph.outputs[0].owner.op
     assert hasattr(op, "destroy_map") and 0 in op.destroy_map
 
@@ -27,7 +27,7 @@ def test_no_output_from_implace():
     opt = AddFeatureOptimizer(NoOutputFromInplace())
     mode_opt = Mode(linker="cvm", optimizer="fast_run").register((opt, 49.9))
 
-    fct_opt = theano.function([x, y], b, mode=mode_opt)
+    fct_opt = aesara.function([x, y], b, mode=mode_opt)
     op = fct_opt.maker.fgraph.outputs[0].owner.op
     assert not hasattr(op, "destroy_map") or 0 not in op.destroy_map
 
diff --git a/tests/compile/test_modes.py b/tests/compile/test_modes.py
index 0656cc49bc..470a06543f 100644
--- a/tests/compile/test_modes.py
+++ b/tests/compile/test_modes.py
@@ -4,9 +4,9 @@
 
 import copy
 
-import theano
-from theano.compile.mode import Mode
-from theano.tensor.type import matrix, vector
+import aesara
+from aesara.compile.mode import Mode
+from aesara.tensor.type import matrix, vector
 
 
 class TestBunchOfModes:
@@ -18,7 +18,7 @@ def test_modes(self):
         predef_modes = ["FAST_COMPILE", "FAST_RUN", "DEBUG_MODE"]
 
         # Linkers to use with regular Mode
-        if theano.config.cxx:
+        if aesara.config.cxx:
             linkers = ["py", "c|py", "c|py_nogc", "vm", "vm_nogc", "cvm", "cvm_nogc"]
         else:
             linkers = ["py", "c|py", "c|py_nogc", "vm", "vm_nogc"]
@@ -27,7 +27,7 @@ def test_modes(self):
         for mode in modes:
             x = matrix()
             y = vector()
-            f = theano.function([x, y], x + y, mode=mode)
+            f = aesara.function([x, y], x + y, mode=mode)
             # test that it runs something
             f([[1, 2], [3, 4]], [5, 6])
             linker_classes_involved.append(f.maker.mode.linker.__class__)
@@ -45,7 +45,7 @@ def test_modes(self):
 class TestOldModesProblem:
     def test_modes(self):
         # Then, build a mode with the same linker, and a modified optimizer
-        default_mode = theano.compile.mode.get_default_mode()
+        default_mode = aesara.compile.mode.get_default_mode()
         modified_mode = default_mode.including("specialize")
 
         # The following line used to fail, with Python 2.4, in July 2012,
@@ -53,5 +53,5 @@ def test_modes(self):
         copy.deepcopy(modified_mode)
 
         # More straightforward test
-        linker = theano.compile.mode.get_default_mode().linker
+        linker = aesara.compile.mode.get_default_mode().linker
         assert not hasattr(linker, "fgraph") or linker.fgraph is None
diff --git a/tests/compile/test_monitormode.py b/tests/compile/test_monitormode.py
index 068c54b0f8..0a3504ceb7 100644
--- a/tests/compile/test_monitormode.py
+++ b/tests/compile/test_monitormode.py
@@ -3,8 +3,8 @@
 
 import numpy as np
 
-import theano
-from theano.tensor.type import dscalar, vector
+import aesara
+from aesara.tensor.type import dscalar, vector
 
 
 def test_detect_nan():
@@ -16,17 +16,17 @@ def detect_nan(fgraph, i, node, fn):
         for output in fn.outputs:
             if np.isnan(output[0]).any():
                 print("*** NaN detected ***")
-                theano.printing.debugprint(node)
+                aesara.printing.debugprint(node)
                 print("Inputs : %s" % [input[0] for input in fn.inputs])
                 print("Outputs: %s" % [output[0] for output in fn.outputs])
                 nan_detected[0] = True
                 break
 
     x = dscalar("x")
-    f = theano.function(
+    f = aesara.function(
         [x],
-        [theano.tensor.log(x) * x],
-        mode=theano.compile.MonitorMode(post_func=detect_nan),
+        [aesara.tensor.log(x) * x],
+        mode=aesara.compile.MonitorMode(post_func=detect_nan),
     )
     try:
         old_stdout = sys.stdout
@@ -46,16 +46,16 @@ def detect_nan(fgraph, i, node, fn):
         for output in fn.outputs:
             if np.isnan(output[0]).any():
                 print("*** NaN detected ***")
-                theano.printing.debugprint(node)
+                aesara.printing.debugprint(node)
                 print("Inputs : %s" % [input[0] for input in fn.inputs])
                 print("Outputs: %s" % [output[0] for output in fn.outputs])
                 nan_detected[0] = True
                 break
 
     x = dscalar("x")
-    mode = theano.compile.MonitorMode(post_func=detect_nan)
+    mode = aesara.compile.MonitorMode(post_func=detect_nan)
     mode = mode.excluding("fusion")
-    f = theano.function([x], [theano.tensor.log(x) * x], mode=mode)
+    f = aesara.function([x], [aesara.tensor.log(x) * x], mode=mode)
     # Test that the fusion wasn't done
     assert len(f.maker.fgraph.apply_nodes) == 2
     try:
@@ -78,19 +78,19 @@ def detect_nan(fgraph, i, node, fn):
         for output in fn.outputs:
             if np.isnan(output[0]).any():
                 print("*** NaN detected ***")
-                theano.printing.debugprint(node)
+                aesara.printing.debugprint(node)
                 print("Inputs : %s" % [input[0] for input in fn.inputs])
                 print("Outputs: %s" % [output[0] for output in fn.outputs])
                 nan_detected[0] = True
                 break
 
     x = vector("x")
-    mode = theano.compile.MonitorMode(post_func=detect_nan)
+    mode = aesara.compile.MonitorMode(post_func=detect_nan)
     # mode = mode.excluding('fusion', 'inplace')
     mode = mode.excluding("local_elemwise_fusion", "inplace_elemwise_optimizer")
-    o = theano.tensor.outer(x, x)
-    out = theano.tensor.log(o) * o
-    f = theano.function([x], [out], mode=mode)
+    o = aesara.tensor.outer(x, x)
+    out = aesara.tensor.log(o) * o
+    f = aesara.function([x], [out], mode=mode)
 
     # Test that the fusion wasn't done
     assert len(f.maker.fgraph.apply_nodes) == 5
diff --git a/tests/compile/test_nanguardmode.py b/tests/compile/test_nanguardmode.py
index be9bbe82f3..eebfb6c679 100644
--- a/tests/compile/test_nanguardmode.py
+++ b/tests/compile/test_nanguardmode.py
@@ -7,11 +7,11 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-from theano.compile.nanguardmode import NanGuardMode
-from theano.tensor.math import dot
-from theano.tensor.type import matrix, tensor3
+import aesara
+import aesara.tensor as tt
+from aesara.compile.nanguardmode import NanGuardMode
+from aesara.tensor.math import dot
+from aesara.tensor.type import matrix, tensor3
 
 
 def test_NanGuardMode():
@@ -19,21 +19,21 @@ def test_NanGuardMode():
     # intentionally. A working implementation should be able to capture all
     # the abnormalties.
     x = matrix()
-    w = theano.shared(np.random.randn(5, 7).astype(theano.config.floatX))
+    w = aesara.shared(np.random.randn(5, 7).astype(aesara.config.floatX))
     y = dot(x, w)
 
-    fun = theano.function(
+    fun = aesara.function(
         [x], y, mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
     )
-    a = np.random.randn(3, 5).astype(theano.config.floatX)
-    infa = np.tile((np.asarray(100.0) ** 1000000).astype(theano.config.floatX), (3, 5))
-    nana = np.tile(np.asarray(np.nan).astype(theano.config.floatX), (3, 5))
-    biga = np.tile(np.asarray(1e20).astype(theano.config.floatX), (3, 5))
+    a = np.random.randn(3, 5).astype(aesara.config.floatX)
+    infa = np.tile((np.asarray(100.0) ** 1000000).astype(aesara.config.floatX), (3, 5))
+    nana = np.tile(np.asarray(np.nan).astype(aesara.config.floatX), (3, 5))
+    biga = np.tile(np.asarray(1e20).astype(aesara.config.floatX), (3, 5))
 
     fun(a)  # normal values
 
     # Temporarily silence logger
-    _logger = logging.getLogger("theano.compile.nanguardmode")
+    _logger = logging.getLogger("aesara.compile.nanguardmode")
     try:
         _logger.propagate = False
         with pytest.raises(AssertionError):
@@ -46,16 +46,16 @@ def test_NanGuardMode():
         _logger.propagate = True
 
     # slices
-    a = np.random.randn(3, 4, 5).astype(theano.config.floatX)
+    a = np.random.randn(3, 4, 5).astype(aesara.config.floatX)
     infa = np.tile(
-        (np.asarray(100.0) ** 1000000).astype(theano.config.floatX), (3, 4, 5)
+        (np.asarray(100.0) ** 1000000).astype(aesara.config.floatX), (3, 4, 5)
     )
-    nana = np.tile(np.asarray(np.nan).astype(theano.config.floatX), (3, 4, 5))
-    biga = np.tile(np.asarray(1e20).astype(theano.config.floatX), (3, 4, 5))
+    nana = np.tile(np.asarray(np.nan).astype(aesara.config.floatX), (3, 4, 5))
+    biga = np.tile(np.asarray(1e20).astype(aesara.config.floatX), (3, 4, 5))
 
     x = tensor3()
     y = x[:, tt.arange(2), tt.arange(2), None]
-    fun = theano.function(
+    fun = aesara.function(
         [x], y, mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
     )
     fun(a)  # normal values
diff --git a/tests/compile/test_ops.py b/tests/compile/test_ops.py
index 9e76915435..da102a3b7f 100644
--- a/tests/compile/test_ops.py
+++ b/tests/compile/test_ops.py
@@ -2,12 +2,12 @@
 
 import numpy as np
 
+from aesara import function
+from aesara.compile.ops import as_op
+from aesara.configdefaults import config
+from aesara.tensor.basic import Rebroadcast
+from aesara.tensor.type import TensorType, dmatrix, dtensor4, dvector
 from tests import unittest_tools as utt
-from theano import function
-from theano.compile.ops import as_op
-from theano.configdefaults import config
-from theano.tensor.basic import Rebroadcast
-from theano.tensor.type import TensorType, dmatrix, dtensor4, dvector
 
 
 @as_op([dmatrix, dmatrix], dmatrix)
diff --git a/tests/compile/test_profiling.py b/tests/compile/test_profiling.py
index aa1bc92cea..9644a468c2 100644
--- a/tests/compile/test_profiling.py
+++ b/tests/compile/test_profiling.py
@@ -5,24 +5,24 @@
 
 import numpy as np
 
-import theano
-import theano.tensor as tt
-from theano.ifelse import ifelse
-from theano.tensor.type import fvector, scalars
+import aesara
+import aesara.tensor as tt
+from aesara.ifelse import ifelse
+from aesara.tensor.type import fvector, scalars
 
 
 class TestProfiling:
-    # Test of Theano profiling with min_peak_memory=True
+    # Test of Aesara profiling with min_peak_memory=True
 
     def test_profiling(self):
 
-        config1 = theano.config.profile
-        config2 = theano.config.profile_memory
-        config3 = theano.config.profiling__min_peak_memory
+        config1 = aesara.config.profile
+        config2 = aesara.config.profile_memory
+        config3 = aesara.config.profiling__min_peak_memory
         try:
-            theano.config.profile = True
-            theano.config.profile_memory = True
-            theano.config.profiling__min_peak_memory = True
+            aesara.config.profile = True
+            aesara.config.profile_memory = True
+            aesara.config.profiling__min_peak_memory = True
 
             x = [fvector("val%i" % i) for i in range(3)]
 
@@ -30,14 +30,14 @@ def test_profiling(self):
             z += [tt.outer(x[i], x[i + 1]).sum(axis=1) for i in range(len(x) - 1)]
             z += [x[i] + x[i + 1] for i in range(len(x) - 1)]
 
-            p = theano.ProfileStats(False, gpu_checks=False)
+            p = aesara.ProfileStats(False, gpu_checks=False)
 
-            if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
+            if aesara.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
                 m = "FAST_RUN"
             else:
                 m = None
 
-            f = theano.function(x, z, profile=p, name="test_profiling", mode=m)
+            f = aesara.function(x, z, profile=p, name="test_profiling", mode=m)
 
             inp = [np.arange(1024, dtype="float32") + 1 for i in range(len(x))]
             f(*inp)
@@ -49,7 +49,7 @@ def test_profiling(self):
             the_string = buf.getvalue()
             lines1 = [l for l in the_string.split("\n") if "Max if linker" in l]
             lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l]
-            if theano.config.device == "cpu":
+            if aesara.config.device == "cpu":
                 assert "CPU: 4112KB (4104KB)" in the_string, (lines1, lines2)
                 assert "CPU: 8204KB (8196KB)" in the_string, (lines1, lines2)
                 assert "CPU: 8208KB" in the_string, (lines1, lines2)
@@ -68,31 +68,31 @@ def test_profiling(self):
                 ), (lines1, lines2)
 
         finally:
-            theano.config.profile = config1
-            theano.config.profile_memory = config2
-            theano.config.profiling__min_peak_memory = config3
+            aesara.config.profile = config1
+            aesara.config.profile_memory = config2
+            aesara.config.profiling__min_peak_memory = config3
 
     def test_ifelse(self):
-        config1 = theano.config.profile
-        config2 = theano.config.profile_memory
+        config1 = aesara.config.profile
+        config2 = aesara.config.profile_memory
 
         try:
-            theano.config.profile = True
-            theano.config.profile_memory = True
+            aesara.config.profile = True
+            aesara.config.profile_memory = True
 
             a, b = scalars("a", "b")
             x, y = scalars("x", "y")
 
             z = ifelse(tt.lt(a, b), x * 2, y * 2)
 
-            p = theano.ProfileStats(False, gpu_checks=False)
+            p = aesara.ProfileStats(False, gpu_checks=False)
 
-            if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
+            if aesara.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
                 m = "FAST_RUN"
             else:
                 m = None
 
-            f_ifelse = theano.function(
+            f_ifelse = aesara.function(
                 [a, b, x, y], z, profile=p, name="test_ifelse", mode=m
             )
 
@@ -104,5 +104,5 @@ def test_ifelse(self):
             f_ifelse(val1, val2, big_mat1, big_mat2)
 
         finally:
-            theano.config.profile = config1
-            theano.config.profile_memory = config2
+            aesara.config.profile = config1
+            aesara.config.profile_memory = config2
diff --git a/tests/compile/test_shared.py b/tests/compile/test_shared.py
index 291a1fdb2e..89d717912d 100644
--- a/tests/compile/test_shared.py
+++ b/tests/compile/test_shared.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pytest
 
-import theano
-from theano.compile.sharedvalue import SharedVariable, generic, shared
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.type import (
+import aesara
+from aesara.compile.sharedvalue import SharedVariable, generic, shared
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.type import (
     TensorType,
     bscalar,
     bvector,
@@ -19,7 +19,7 @@
     wscalar,
     wvector,
 )
-from theano.utils import PYTHON_INT_BITWIDTH
+from aesara.utils import PYTHON_INT_BITWIDTH
 
 
 class TestSharedVariable:
@@ -329,8 +329,8 @@ def f(var, val):
         #        assert b.type == dvector
         #        f(b,[8])
 
-        b = shared(np.asarray([7.234], dtype=theano.config.floatX), allow_downcast=True)
-        assert b.dtype == theano.config.floatX
+        b = shared(np.asarray([7.234], dtype=aesara.config.floatX), allow_downcast=True)
+        assert b.dtype == aesara.config.floatX
         f(b, [8])
         assert b.get_value() == 8
 
@@ -340,5 +340,5 @@ def f(var, val):
 
     def test_err_symbolic_variable(self):
         with pytest.raises(TypeError):
-            shared(theano.tensor.ones((2, 3)))
+            shared(aesara.tensor.ones((2, 3)))
         shared(np.ones((2, 4)))
diff --git a/tests/d3viz/models.py b/tests/d3viz/models.py
index 9b5d6f3ed4..63aeaa867d 100644
--- a/tests/d3viz/models.py
+++ b/tests/d3viz/models.py
@@ -1,8 +1,8 @@
 import numpy as np
 
-import theano
-import theano.tensor as tt
-from theano.tensor.type import dmatrix, scalars
+import aesara
+import aesara.tensor as tt
+from aesara.tensor.type import dmatrix, scalars
 
 
 class Mlp:
@@ -17,13 +17,13 @@ def __init__(self, nfeatures=100, noutputs=10, nhiddens=50, rng=None):
         self.nhiddens = nhiddens
 
         x = dmatrix("x")
-        wh = theano.shared(self.rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)
-        bh = theano.shared(np.zeros(nhiddens), borrow=True)
-        h = theano.tensor.nnet.sigmoid(tt.dot(x, wh) + bh)
+        wh = aesara.shared(self.rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)
+        bh = aesara.shared(np.zeros(nhiddens), borrow=True)
+        h = aesara.tensor.nnet.sigmoid(tt.dot(x, wh) + bh)
 
-        wy = theano.shared(self.rng.normal(0, 1, (nhiddens, noutputs)))
-        by = theano.shared(np.zeros(noutputs), borrow=True)
-        y = theano.tensor.nnet.softmax(tt.dot(h, wy) + by)
+        wy = aesara.shared(self.rng.normal(0, 1, (nhiddens, noutputs)))
+        by = aesara.shared(np.zeros(noutputs), borrow=True)
+        y = aesara.tensor.nnet.softmax(tt.dot(h, wy) + by)
 
         self.inputs = [x]
         self.outputs = [y]
@@ -33,9 +33,9 @@ class OfgNested:
     def __init__(self):
         x, y, z = scalars("xyz")
         e = x * y
-        op = theano.compile.builders.OpFromGraph([x, y], [e])
+        op = aesara.compile.builders.OpFromGraph([x, y], [e])
         e2 = op(x, y) + z
-        op2 = theano.compile.builders.OpFromGraph([x, y, z], [e2])
+        op2 = aesara.compile.builders.OpFromGraph([x, y, z], [e2])
         e3 = op2(x, y, z) + z
 
         self.inputs = [x, y, z]
@@ -45,8 +45,8 @@ def __init__(self):
 class Ofg:
     def __init__(self):
         x, y, z = scalars("xyz")
-        e = theano.tensor.nnet.sigmoid((x + y + z) ** 2)
-        op = theano.compile.builders.OpFromGraph([x, y, z], [e])
+        e = aesara.tensor.nnet.sigmoid((x + y + z) ** 2)
+        op = aesara.compile.builders.OpFromGraph([x, y, z], [e])
         e2 = op(x, y, z) + op(z, y, x)
 
         self.inputs = [x, y, z]
@@ -56,8 +56,8 @@ def __init__(self):
 class OfgSimple:
     def __init__(self):
         x, y, z = scalars("xyz")
-        e = theano.tensor.nnet.sigmoid((x + y + z) ** 2)
-        op = theano.compile.builders.OpFromGraph([x, y, z], [e])
+        e = aesara.tensor.nnet.sigmoid((x + y + z) ** 2)
+        op = aesara.compile.builders.OpFromGraph([x, y, z], [e])
         e2 = op(x, y, z)
 
         self.inputs = [x, y, z]
diff --git a/tests/d3viz/test_d3viz.py b/tests/d3viz/test_d3viz.py
index 05ab624bee..5e02d0f970 100644
--- a/tests/d3viz/test_d3viz.py
+++ b/tests/d3viz/test_d3viz.py
@@ -5,10 +5,10 @@
 import numpy as np
 import pytest
 
-import theano as th
-import theano.d3viz as d3v
+import aesara as th
+import aesara.d3viz as d3v
+from aesara.d3viz.formatting import pydot_imported, pydot_imported_msg
 from tests.d3viz import models
-from theano.d3viz.formatting import pydot_imported, pydot_imported_msg
 
 
 if not pydot_imported:
diff --git a/tests/d3viz/test_formatting.py b/tests/d3viz/test_formatting.py
index 80705d7f6f..b6666ff9b5 100644
--- a/tests/d3viz/test_formatting.py
+++ b/tests/d3viz/test_formatting.py
@@ -1,8 +1,8 @@
 import numpy as np
 import pytest
 
-import theano as th
-from theano.d3viz.formatting import PyDotFormatter, pydot_imported, pydot_imported_msg
+import aesara as th
+from aesara.d3viz.formatting import PyDotFormatter, pydot_imported, pydot_imported_msg
 
 
 if not pydot_imported:
diff --git a/tests/diverse_tests.py b/tests/diverse_tests.py
index 11cd285929..2b40aa9a1e 100644
--- a/tests/diverse_tests.py
+++ b/tests/diverse_tests.py
@@ -1,6 +1,6 @@
 """
   Different tests that are not connected to any particular Op, or
-  functionality of Theano. Here will go for example code that we will
+  functionality of Aesara. Here will go for example code that we will
   publish in papers, that we should ensure that it will remain
   operational
 
@@ -8,11 +8,11 @@
 import numpy as np
 import numpy.random
 
+from aesara import config, function, shared
+from aesara.gradient import grad
+from aesara.tensor.math import dot, exp, log
+from aesara.tensor.type import matrix, vector
 from tests import unittest_tools as utt
-from theano import config, function, shared
-from theano.gradient import grad
-from theano.tensor.math import dot, exp, log
-from theano.tensor.type import matrix, vector
 
 
 class TestScipy:
@@ -35,7 +35,7 @@ def test_scipy_paper_example2(self):
         w = shared(rng.randn(100))
         b = shared(np.zeros(()))
 
-        # Construct Theano expression graph
+        # Construct Aesara expression graph
         p_1 = 1 / (1 + exp(-dot(x, w) - b))
         xent = -y * log(p_1) - (1 - y) * log(1 - p_1)
         prediction = p_1 > 0.5
diff --git a/tests/gpuarray/check_dnn_conv.py b/tests/gpuarray/check_dnn_conv.py
index 10be2f3993..2fd51cfc41 100644
--- a/tests/gpuarray/check_dnn_conv.py
+++ b/tests/gpuarray/check_dnn_conv.py
@@ -19,31 +19,31 @@
 
 import numpy as np
 import pytest
-from theanot.tensor.type import TensorType
+from aesarat.tensor.type import TensorType
 
+import aesara
 import tests.unittest_tools as utt
-import theano
+from aesara.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME
+from aesara.gpuarray import cudnn_defs
+from aesara.gpuarray.dnn import GpuDnnConv, GpuDnnConvGradI, GpuDnnConvGradW
+from aesara.gpuarray.dnn import _dnn_conv as dnn_conv
+from aesara.gpuarray.dnn import _dnn_gradinput as dnn_gradinput
+from aesara.gpuarray.dnn import _dnn_gradweight as dnn_gradweight
+from aesara.gpuarray.dnn import version
+from aesara.tensor.nnet.abstract_conv import assert_conv_shape, get_conv_output_shape
+from aesara.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
+from aesara.tensor.nnet.corr3d import Corr3dMM, Corr3dMMGradInputs, Corr3dMMGradWeights
 from tests.gpuarray.config import mode_with_gpu, ref_cast
-from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME
-from theano.gpuarray import cudnn_defs
-from theano.gpuarray.dnn import GpuDnnConv, GpuDnnConvGradI, GpuDnnConvGradW
-from theano.gpuarray.dnn import _dnn_conv as dnn_conv
-from theano.gpuarray.dnn import _dnn_gradinput as dnn_gradinput
-from theano.gpuarray.dnn import _dnn_gradweight as dnn_gradweight
-from theano.gpuarray.dnn import version
-from theano.tensor.nnet.abstract_conv import assert_conv_shape, get_conv_output_shape
-from theano.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
-from theano.tensor.nnet.corr3d import Corr3dMM, Corr3dMMGradInputs, Corr3dMMGradWeights
 
 
 def check_dtype_config_support(dtype, precision):
     # We use FWD 2D to check it.
     # Based on documentation, algo small (CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM)
     # should support all configurations, for both v5.1, v6 and v7.
-    inputs = theano.shared(np.zeros((1, 1, 2, 2), dtype=dtype))
-    filters = theano.shared(np.zeros((1, 1, 2, 2), dtype=dtype))
+    inputs = aesara.shared(np.zeros((1, 1, 2, 2), dtype=dtype))
+    filters = aesara.shared(np.zeros((1, 1, 2, 2), dtype=dtype))
     conv = dnn_conv(inputs, filters, precision=precision, algo="small")
-    f = theano.function([], conv, mode=mode_with_gpu)
+    f = aesara.function([], conv, mode=mode_with_gpu)
     try:
         f()
     except RuntimeError as e:
@@ -83,9 +83,9 @@ def __init__(
         assert len(inputs_shape) == len(filters_shape) in (4, 5)
         ndim = len(inputs_shape) - 2
         if dtype is None:
-            dtype = theano.config.floatX
+            dtype = aesara.config.floatX
         if precision is None:
-            precision = theano.config.floatX
+            precision = aesara.config.floatX
         if subsample is None:
             subsample = (1,) * ndim
         if dilation is None:
@@ -647,8 +647,8 @@ def run_conv_fwd(self, algo, dtype, precision, parameters):
         inputs_val /= 10
         filters_val /= 10
 
-        inputs = theano.shared(inputs_val)
-        filters = theano.shared(filters_val)
+        inputs = aesara.shared(inputs_val)
+        filters = aesara.shared(filters_val)
 
         if beta == 0:
             out = None
@@ -657,7 +657,7 @@ def run_conv_fwd(self, algo, dtype, precision, parameters):
                 inputs_shape, filters_shape, border_mode, subsample, dilation, dtype
             )
             out /= 10
-        # Compile a theano function for the cuDNN implementation
+        # Compile an Aesara function for the cuDNN implementation
         conv = dnn_conv(
             img=inputs,
             kerns=filters,
@@ -671,7 +671,7 @@ def run_conv_fwd(self, algo, dtype, precision, parameters):
             algo=algo,
             precision=precision,
         )
-        f = theano.function([], conv, mode=mode_with_gpu)
+        f = aesara.function([], conv, mode=mode_with_gpu)
 
         # If conv_mode is 'conv' the reference implementation should use
         # filters flipped according to the width, height and time axis
@@ -683,11 +683,11 @@ def run_conv_fwd(self, algo, dtype, precision, parameters):
         else:
             flipped_filters = filters
 
-        # Compile a theano function for the reference implementation
+        # Compile an Aesara function for the reference implementation
         conv_ref = self.cpu_conv_class(
             border_mode=border_mode, subsample=subsample, filter_dilation=dilation
         )(ref_cast(inputs), flipped_filters)
-        f_ref = theano.function([], conv_ref, mode="FAST_RUN")
+        f_ref = aesara.function([], conv_ref, mode="FAST_RUN")
 
         # Compare the results of the two implementations
         res_ref = f_ref()
@@ -729,10 +729,10 @@ def run_conv_gradinput(self, algo, dtype, precision, parameters):
         filters_val /= 10
         topgrad_val /= 10
 
-        filters = theano.shared(filters_val)
-        topgrad = theano.shared(topgrad_val)
+        filters = aesara.shared(filters_val)
+        topgrad = aesara.shared(topgrad_val)
 
-        # Compile a theano function for the cuDNN implementation
+        # Compile an Aesara function for the cuDNN implementation
         grad_i = dnn_gradinput(
             filters,
             topgrad,
@@ -748,7 +748,7 @@ def run_conv_gradinput(self, algo, dtype, precision, parameters):
             precision=precision,
         )
 
-        f = theano.function([], grad_i, mode=mode_with_gpu)
+        f = aesara.function([], grad_i, mode=mode_with_gpu)
 
         # If conv_mode is 'conv' the reference implementation should use
         # filters flipped according to the width, height and time axis
@@ -760,11 +760,11 @@ def run_conv_gradinput(self, algo, dtype, precision, parameters):
         else:
             flipped_filters = filters
 
-        # Compile a theano function for the reference implementation
+        # Compile an Aesara function for the reference implementation
         grad_i_ref = self.cpu_gradinput_class(
             border_mode=border_mode, subsample=subsample, filter_dilation=dilation
         )(ref_cast(flipped_filters), ref_cast(topgrad), inputs_shape[2:])
-        f_ref = theano.function([], grad_i_ref, mode="FAST_RUN")
+        f_ref = aesara.function([], grad_i_ref, mode="FAST_RUN")
 
         # Compare the results of the two implementations
         res_ref = f_ref()
@@ -806,10 +806,10 @@ def run_conv_gradweight(self, algo, dtype, precision, parameters):
         inputs_val /= 10
         topgrad_val /= 10
 
-        inputs = theano.shared(inputs_val)
-        topgrad = theano.shared(topgrad_val)
+        inputs = aesara.shared(inputs_val)
+        topgrad = aesara.shared(topgrad_val)
 
-        # Compile a theano function for the cuDNN implementation
+        # Compile an Aesara function for the cuDNN implementation
         grad_w = dnn_gradweight(
             inputs,
             topgrad,
@@ -825,9 +825,9 @@ def run_conv_gradweight(self, algo, dtype, precision, parameters):
             precision=precision,
         )
 
-        f = theano.function([], grad_w, mode=mode_with_gpu)
+        f = aesara.function([], grad_w, mode=mode_with_gpu)
 
-        # Compile a theano function for the reference implementation
+        # Compile an Aesara function for the reference implementation
         grad_w_ref = self.cpu_gradweight_class(
             border_mode=border_mode, subsample=subsample, filter_dilation=dilation
         )(ref_cast(inputs), ref_cast(topgrad), filters_shape[2:])
@@ -836,7 +836,7 @@ def run_conv_gradweight(self, algo, dtype, precision, parameters):
                 grad_w_ref = grad_w_ref[:, :, ::-1, ::-1, ::-1]
             else:
                 grad_w_ref = grad_w_ref[:, :, ::-1, ::-1]
-        f_ref = theano.function([], grad_w_ref, mode="FAST_RUN")
+        f_ref = aesara.function([], grad_w_ref, mode="FAST_RUN")
 
         # Compare the results of the two implementations
         res_ref = f_ref()
@@ -970,7 +970,7 @@ def test_gradweight(self):
                 else:
                     self.run_conv_gradweight(dnn_case.get_case())
 
-    # The 3 following tests are intended to be run with theano flag `cmodule__debug=True`.
+    # The 3 following tests are intended to be run with aesara flag `cmodule__debug=True`.
     # The output message should then be analyzed to check if runtime algorithms are
     # reused, reloaded from cache or updated, depending on what we expect from
     # dnn_fwd/dnn_gi/dnn_gw current codes. I currently don't know a better way
@@ -996,7 +996,7 @@ def run_fwd_runtime_algorithm(algo):
                 subsample=unit_shape,
                 dilation=unit_shape,
             )
-            f = theano.function([inputs, filters], conv, mode=mode_with_gpu)
+            f = aesara.function([inputs, filters], conv, mode=mode_with_gpu)
             if self.ndim == 3:
                 flipped_filters = lower_filters[:, :, ::-1, ::-1, ::-1]
             else:
@@ -1004,7 +1004,7 @@ def run_fwd_runtime_algorithm(algo):
             conv_ref = self.cpu_conv_class(subsample=unit_shape)(
                 ref_cast(lower_inputs), flipped_filters
             )
-            f_ref = theano.function([inputs, filters], conv_ref, mode="FAST_RUN")
+            f_ref = aesara.function([inputs, filters], conv_ref, mode="FAST_RUN")
             runtime_shapes = self.runtime_shapes
             if algo in ("time_once", "guess_once"):
                 runtime_shapes = [list(runtime_shapes[0])]
@@ -1028,7 +1028,7 @@ def test_gradinput_runtime_algorithms(self):
         _broadcastable = [False] * (2 + self.ndim)
 
         def run_gradinput_runtime_algorithm(algo):
-            theano.config.dnn__conv__algo_bwd_data = algo
+            aesara.config.dnn__conv__algo_bwd_data = algo
             inputs = TensorType(dtype, _broadcastable)()
             filters = TensorType(dtype, _broadcastable)()
             conv = dnn_conv(
@@ -1039,8 +1039,8 @@ def run_gradinput_runtime_algorithm(algo):
                 subsample=unit_shape,
                 dilation=unit_shape,
             )
-            grad_i = theano.gradient.grad(conv.sum(), [inputs])
-            f = theano.function([inputs, filters], grad_i, mode=mode_with_gpu)
+            grad_i = aesara.gradient.grad(conv.sum(), [inputs])
+            f = aesara.function([inputs, filters], grad_i, mode=mode_with_gpu)
             assert 1 == len(
                 [
                     node
@@ -1062,8 +1062,8 @@ def run_gradinput_runtime_algorithm(algo):
             conv_ref = self.cpu_conv_class(subsample=unit_shape)(
                 ref_cast(inputs), flipped_filters
             )
-            grad_i_ref = theano.gradient.grad(conv_ref.sum(), [inputs])
-            f_ref = theano.function([inputs, filters], grad_i_ref, mode="FAST_RUN")
+            grad_i_ref = aesara.gradient.grad(conv_ref.sum(), [inputs])
+            f_ref = aesara.function([inputs, filters], grad_i_ref, mode="FAST_RUN")
             runtime_shapes = self.runtime_shapes
             if algo in ("time_once", "guess_once"):
                 runtime_shapes = [list(runtime_shapes[0])]
@@ -1086,54 +1086,55 @@ def test_gradweight_runtime_algorithms(self):
         _broadcastable = [False] * (2 + self.ndim)
 
         def run_gradweight_runtime_algorithm(algo):
-            theano.config.dnn__conv__algo_bwd_filter = algo
-            inputs = TensorType(dtype, _broadcastable)()
-            filters = TensorType(dtype, _broadcastable)()
-            conv = dnn_conv(
-                img=inputs,
-                kerns=filters,
-                algo=algo,
-                precision=dtype,
-                subsample=unit_shape,
-                dilation=unit_shape,
-            )
-            grad_w = theano.gradient.grad(conv.sum(), [filters])
-            f = theano.function([inputs, filters], grad_w, mode=mode_with_gpu)
-            assert 1 == len(
-                [
-                    node
+            with aesara.config.change_flags(dnn__conv__algo_bwd_filter=algo):
+                inputs = TensorType(dtype, _broadcastable)()
+                filters = TensorType(dtype, _broadcastable)()
+                conv = dnn_conv(
+                    img=inputs,
+                    kerns=filters,
+                    algo=algo,
+                    precision=dtype,
+                    subsample=unit_shape,
+                    dilation=unit_shape,
+                )
+                grad_w = aesara.gradient.grad(conv.sum(), [filters])
+                f = aesara.function([inputs, filters], grad_w, mode=mode_with_gpu)
+                assert 1 == len(
+                    [
+                        node
+                        for node in f.maker.fgraph.apply_nodes
+                        if isinstance(node.op, GpuDnnConvGradW)
+                    ]
+                )
+                assert not any(
+                    isinstance(node.op, GpuDnnConv)
                     for node in f.maker.fgraph.apply_nodes
-                    if isinstance(node.op, GpuDnnConvGradW)
-                ]
-            )
-            assert not any(
-                isinstance(node.op, GpuDnnConv) for node in f.maker.fgraph.apply_nodes
-            )
-            assert not any(
-                isinstance(node.op, GpuDnnConvGradI)
-                for node in f.maker.fgraph.apply_nodes
-            )
-            if self.ndim == 3:
-                flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
-            else:
-                flipped_filters = filters[:, :, ::-1, ::-1]
-            conv_ref = self.cpu_conv_class(subsample=unit_shape)(
-                ref_cast(inputs), flipped_filters
-            )
-            grad_w_ref = theano.gradient.grad(conv_ref.sum(), [filters])
-            f_ref = theano.function([inputs, filters], grad_w_ref, mode="FAST_RUN")
-            runtime_shapes = self.runtime_shapes
-            if algo in ("time_once", "guess_once"):
-                runtime_shapes = [list(runtime_shapes[0])]
-                runtime_shapes[0][0] = 5
-            for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
-                print("Shapes:", inputs_shape, filters_shape)
-                for i in range(ntimes):
-                    inputs_val = np.random.random(inputs_shape).astype(dtype)
-                    filters_val = np.random.random(filters_shape).astype(dtype)
-                    gpu_res = f(inputs_val, filters_val)
-                    cpu_res = f_ref(inputs_val, filters_val)
-                    utt.assert_allclose(cpu_res, np.asarray(gpu_res))
+                )
+                assert not any(
+                    isinstance(node.op, GpuDnnConvGradI)
+                    for node in f.maker.fgraph.apply_nodes
+                )
+                if self.ndim == 3:
+                    flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
+                else:
+                    flipped_filters = filters[:, :, ::-1, ::-1]
+                conv_ref = self.cpu_conv_class(subsample=unit_shape)(
+                    ref_cast(inputs), flipped_filters
+                )
+                grad_w_ref = aesara.gradient.grad(conv_ref.sum(), [filters])
+                f_ref = aesara.function([inputs, filters], grad_w_ref, mode="FAST_RUN")
+                runtime_shapes = self.runtime_shapes
+                if algo in ("time_once", "guess_once"):
+                    runtime_shapes = [list(runtime_shapes[0])]
+                    runtime_shapes[0][0] = 5
+                for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
+                    print("Shapes:", inputs_shape, filters_shape)
+                    for i in range(ntimes):
+                        inputs_val = np.random.random(inputs_shape).astype(dtype)
+                        filters_val = np.random.random(filters_shape).astype(dtype)
+                        gpu_res = f(inputs_val, filters_val)
+                        cpu_res = f_ref(inputs_val, filters_val)
+                        utt.assert_allclose(cpu_res, np.asarray(gpu_res))
 
         for algo in SUPPORTED_DNN_CONV_ALGO_RUNTIME:
             run_gradweight_runtime_algorithm(algo)
diff --git a/tests/gpuarray/config.py b/tests/gpuarray/config.py
index d70cc4caa3..0ee13ea888 100644
--- a/tests/gpuarray/config.py
+++ b/tests/gpuarray/config.py
@@ -1,21 +1,21 @@
 import pytest
 
-import theano.gpuarray
-import theano.tensor
+import aesara.gpuarray
+import aesara.tensor
 
 
-if theano.gpuarray.pygpu is None:
+if aesara.gpuarray.pygpu is None:
     pytest.skip("pygpu not installed", allow_module_level=True)
 
 
 init_error = None
-if not theano.gpuarray.pygpu_activated and not theano.config.force_device:
+if not aesara.gpuarray.pygpu_activated and not aesara.config.force_device:
     try:
-        theano.gpuarray.init_dev("cuda")
+        aesara.gpuarray.init_dev("cuda")
     except Exception as e:
         init_error = e
 
-if not theano.gpuarray.pygpu_activated:
+if not aesara.gpuarray.pygpu_activated:
     if init_error:
         pytest.skip(str(init_error), allow_module_level=True)
     else:
@@ -23,21 +23,21 @@
 
 test_ctx_name = None
 
-if theano.config.mode == "FAST_COMPILE":
+if aesara.config.mode == "FAST_COMPILE":
     mode_with_gpu = (
-        theano.compile.mode.get_mode("FAST_RUN").including("gpuarray").excluding("gpu")
+        aesara.compile.mode.get_mode("FAST_RUN").including("gpuarray").excluding("gpu")
     )
-    mode_without_gpu = theano.compile.mode.get_mode("FAST_RUN").excluding("gpuarray")
+    mode_without_gpu = aesara.compile.mode.get_mode("FAST_RUN").excluding("gpuarray")
 else:
     mode_with_gpu = (
-        theano.compile.mode.get_default_mode().including("gpuarray").excluding("gpu")
+        aesara.compile.mode.get_default_mode().including("gpuarray").excluding("gpu")
     )
-    mode_without_gpu = theano.compile.mode.get_default_mode().excluding("gpuarray")
+    mode_without_gpu = aesara.compile.mode.get_default_mode().excluding("gpuarray")
     mode_without_gpu.check_py_code = False
 
 
 # If using float16, cast reference input to float32
 def ref_cast(x):
     if x.type.dtype == "float16":
-        x = theano.tensor.cast(x, "float32")
+        x = aesara.tensor.cast(x, "float32")
     return x
diff --git a/tests/gpuarray/rnn_support.py b/tests/gpuarray/rnn_support.py
index caa7da013f..7d59c83d00 100644
--- a/tests/gpuarray/rnn_support.py
+++ b/tests/gpuarray/rnn_support.py
@@ -1,8 +1,8 @@
 import numpy as np
 
-import theano
-from theano.tensor import nnet
-from theano.tensor.math import dot, tanh
+import aesara
+from aesara.tensor import nnet
+from aesara.tensor.math import dot, tanh
 
 
 class Model:
@@ -29,13 +29,13 @@ def uniform(stdev, size):
     """uniform distribution with the given stdev and size"""
     return np.random.uniform(
         low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size
-    ).astype(theano.config.floatX)
+    ).astype(aesara.config.floatX)
 
 
 def linear_transform_weights(input_dim, output_dim, param_list=None, name=""):
-    "theano shared variable given input and output dimension"
+    "aesara shared variable given input and output dimension"
     weight_inialization = uniform(np.sqrt(2.0 / input_dim), (input_dim, output_dim))
-    W = theano.shared(weight_inialization, name=name)
+    W = aesara.shared(weight_inialization, name=name)
 
     assert param_list is not None
 
@@ -44,10 +44,10 @@ def linear_transform_weights(input_dim, output_dim, param_list=None, name=""):
 
 
 def bias_weights(length, param_list=None, name=""):
-    "theano shared variable for bias unit, given length"
-    bias_initialization = np.zeros(length).astype(theano.config.floatX)
+    "aesara shared variable for bias unit, given length"
+    bias_initialization = np.zeros(length).astype(aesara.config.floatX)
 
-    bias = theano.shared(bias_initialization, name=name)
+    bias = aesara.shared(bias_initialization, name=name)
 
     if param_list is not None:
         param_list.append(bias)
@@ -145,7 +145,7 @@ def step(inp, s_prev):
 
         outputs_info = self.s0
 
-        states, updates = theano.scan(
+        states, updates = aesara.scan(
             fn=step, sequences=[self.X], outputs_info=outputs_info
         )
 
@@ -250,7 +250,7 @@ def step(x_t, h_tm1, c_tm1):
 
         outputs_info = [self.s0, self.c0]
 
-        states, updates = theano.scan(
+        states, updates = aesara.scan(
             fn=step, sequences=[self.X], outputs_info=outputs_info
         )
 
diff --git a/tests/gpuarray/run_dnn_conv.py b/tests/gpuarray/run_dnn_conv.py
index 765628e652..8e91c78ddf 100644
--- a/tests/gpuarray/run_dnn_conv.py
+++ b/tests/gpuarray/run_dnn_conv.py
@@ -5,10 +5,9 @@
 import argparse
 import sys
 
-import theano
-from tests.gpuarray.check_dnn_conv import CheckDnn, TestDnnConv2D, TestDnnConv3D, cudnn
-from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME
-from theano.gpuarray.cudnn_defs import (
+import aesara
+from aesara.configdefaults import SUPPORTED_DNN_CONV_ALGO_RUNTIME
+from aesara.gpuarray.cudnn_defs import (
     DOUBLE,
     DOUBLE_CONFIG,
     FLOAT,
@@ -17,7 +16,8 @@
     PSEUDO_HALF_CONFIG,
     TRUE_HALF_CONFIG,
 )
-from theano.tensor.nnet.abstract_conv import get_conv_output_shape
+from aesara.tensor.nnet.abstract_conv import get_conv_output_shape
+from tests.gpuarray.check_dnn_conv import CheckDnn, TestDnnConv2D, TestDnnConv3D, cudnn
 
 
 if __name__ != "__main__":
@@ -95,7 +95,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     "--dtype-config",
     choices=list(sorted(data_type_configurations.keys())),
     default=None,
-    help="Data type configuration for (data type; precision). Default (theano floatX; theano floatX). "
+    help="Data type configuration for (data type; precision). Default (aesara floatX; aesara floatX). "
     "To specify data type configuration, you can either use this option or set data type and "
     'precision separately with "-t" and "-p" options.',
 )
@@ -104,14 +104,14 @@ def __call__(self, parser, namespace, values, option_string=None):
     "--dtype",
     choices=types,
     default=None,
-    help="Data type (default theano floatX).",
+    help="Data type (default aesara floatX).",
 )
 parser.add_argument(
     "-p",
     "--precision",
     choices=types,
     default=None,
-    help="Precision (default theano floatX).",
+    help="Precision (default aesara floatX).",
 )
 parser.add_argument(
     "-s",
@@ -189,9 +189,9 @@ def __call__(self, parser, namespace, values, option_string=None):
 
 if args.dtype_config is None:
     if args.dtype is None:
-        args.dtype = theano.config.floatX
+        args.dtype = aesara.config.floatX
     if args.precision is None:
-        args.precision = theano.config.floatX
+        args.precision = aesara.config.floatX
 else:
     if args.dtype is not None or args.precision is not None:
         raise ValueError(
diff --git a/tests/gpuarray/test_abstractconv.py b/tests/gpuarray/test_abstractconv.py
index 419281a2f0..c4dd2617c7 100644
--- a/tests/gpuarray/test_abstractconv.py
+++ b/tests/gpuarray/test_abstractconv.py
@@ -5,14 +5,7 @@
 pygpu = pytest.importorskip("pygpu")
 gpuarray = pygpu.gpuarray
 
-from tests.gpuarray.config import mode_with_gpu, test_ctx_name
-from tests.tensor.nnet.test_abstract_conv import (
-    BaseTestConv2d,
-    BaseTestConv3d,
-    TestConv2dTranspose,
-    TestConvTypes,
-)
-from theano.gpuarray.blas import (
+from aesara.gpuarray.blas import (
     GpuCorr3dMM,
     GpuCorr3dMM_gradInputs,
     GpuCorr3dMM_gradWeights,
@@ -20,13 +13,20 @@
     GpuCorrMM_gradInputs,
     GpuCorrMM_gradWeights,
 )
-from theano.gpuarray.dnn import (
+from aesara.gpuarray.dnn import (
     GpuDnnConv,
     GpuDnnConvGradI,
     GpuDnnConvGradW,
     dnn_available,
 )
-from theano.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
+from aesara.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
+from tests.gpuarray.config import mode_with_gpu, test_ctx_name
+from tests.tensor.nnet.test_abstract_conv import (
+    BaseTestConv2d,
+    BaseTestConv3d,
+    TestConv2dTranspose,
+    TestConvTypes,
+)
 
 
 gpu_ftensor4 = GpuArrayType(dtype="float32", broadcastable=(False,) * 4)
diff --git a/tests/gpuarray/test_basic_ops.py b/tests/gpuarray/test_basic_ops.py
index a64fc6217f..2474b7a019 100644
--- a/tests/gpuarray/test_basic_ops.py
+++ b/tests/gpuarray/test_basic_ops.py
@@ -1,20 +1,9 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-
-# Don't import test classes otherwise they get tested as part of the file
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, test_ctx_name
-from tests.tensor.test_basic import (
-    TestAlloc,
-    TestComparison,
-    TestJoinAndSplit,
-    TestReshape,
-)
-from tests.tensor.utils import rand, safe_make_node
-from theano.gpuarray.basic_ops import (
+import aesara
+import aesara.tensor as tt
+from aesara.gpuarray.basic_ops import (
     GpuAlloc,
     GpuAllocEmpty,
     GpuContiguous,
@@ -30,12 +19,23 @@
     gpu_join,
     host_from_gpu,
 )
-from theano.gpuarray.elemwise import GpuDimShuffle, GpuElemwise
-from theano.gpuarray.subtensor import GpuSubtensor
-from theano.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
-from theano.tensor.basic import Alloc, MakeVector, Split, alloc
-from theano.tensor.shape import Shape, Shape_i
-from theano.tensor.type import TensorType, fmatrix, iscalar, lscalar, matrix
+from aesara.gpuarray.elemwise import GpuDimShuffle, GpuElemwise
+from aesara.gpuarray.subtensor import GpuSubtensor
+from aesara.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
+from aesara.tensor.basic import Alloc, MakeVector, Split, alloc
+from aesara.tensor.shape import Shape, Shape_i
+from aesara.tensor.type import TensorType, fmatrix, iscalar, lscalar, matrix
+
+# Don't import test classes otherwise they get tested as part of the file
+from tests import unittest_tools as utt
+from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, test_ctx_name
+from tests.tensor.test_basic import (
+    TestAlloc,
+    TestComparison,
+    TestJoinAndSplit,
+    TestReshape,
+)
+from tests.tensor.utils import rand, safe_make_node
 
 
 pygpu = pytest.importorskip("pygpu")
@@ -55,7 +55,7 @@ def inplace_func(
 ):
     if mode is None:
         mode = mode_with_gpu
-    return theano.function(
+    return aesara.function(
         inputs,
         outputs,
         mode=mode,
@@ -67,7 +67,7 @@ def inplace_func(
 
 
 def fake_shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
-    from theano.tensor.sharedvar import scalar_constructor, tensor_constructor
+    from aesara.tensor.sharedvar import scalar_constructor, tensor_constructor
 
     for c in (gpuarray_shared_constructor, tensor_constructor, scalar_constructor):
         try:
@@ -80,7 +80,7 @@ def fake_shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
 
 def rand_gpuarray(*shape, **kwargs):
     r = rng.rand(*shape) * 2 - 1
-    dtype = kwargs.pop("dtype", theano.config.floatX)
+    dtype = kwargs.pop("dtype", aesara.config.floatX)
     cls = kwargs.pop("cls", None)
     if len(kwargs) != 0:
         raise TypeError("Unexpected argument %s", list(kwargs.keys())[0])
@@ -124,12 +124,12 @@ def test_all(self):
             for testname, inputs in cases.items():
                 for _ in range(len(inputs)):
                     if type(inputs[_]) is float:
-                        inputs[_] = np.asarray(inputs[_], dtype=theano.config.floatX)
+                        inputs[_] = np.asarray(inputs[_], dtype=aesara.config.floatX)
                 self.run_case(testname, inputs)
 
         def run_case(self, testname, inputs):
-            inputs_ref = [theano.shared(inp) for inp in inputs]
-            inputs_tst = [theano.shared(inp) for inp in inputs]
+            inputs_ref = [aesara.shared(inp) for inp in inputs]
+            inputs_tst = [aesara.shared(inp) for inp in inputs]
 
             try:
                 node_ref = safe_make_node(self.op, *inputs_ref)
@@ -222,11 +222,11 @@ def test_transfer_cpu_gpu():
     av = np.asarray(rng.rand(5, 4), dtype="float32")
     gv = gpuarray.array(av, context=get_context(test_ctx_name))
 
-    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
+    f = aesara.function([a], GpuFromHost(test_ctx_name)(a))
     fv = f(av)
     assert GpuArrayType.values_eq(fv, gv)
 
-    f = theano.function([g], host_from_gpu(g))
+    f = aesara.function([g], host_from_gpu(g))
     fv = f(gv)
     assert np.all(fv == av)
 
@@ -241,7 +241,7 @@ def test_transfer_gpu_gpu():
     mode = mode_with_gpu.excluding(
         "cut_gpua_host_transfers", "local_cut_gpua_host_gpua"
     )
-    f = theano.function([g], GpuToGpu(test_ctx_name)(g), mode=mode)
+    f = aesara.function([g], GpuToGpu(test_ctx_name)(g), mode=mode)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 1
     assert isinstance(topo[0].op, GpuToGpu)
@@ -250,7 +250,7 @@ def test_transfer_gpu_gpu():
 
 
 def test_transfer_strided():
-    # This is just to ensure that it works in theano
+    # This is just to ensure that it works in aesara
     # libgpuarray has a much more comprehensive suit of tests to
     # ensure correctness
     a = fmatrix("a")
@@ -262,11 +262,11 @@ def test_transfer_strided():
     av = av[:, ::2]
     gv = gv[:, ::2]
 
-    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
+    f = aesara.function([a], GpuFromHost(test_ctx_name)(a))
     fv = f(av)
     assert GpuArrayType.values_eq(fv, gv)
 
-    f = theano.function([g], host_from_gpu(g))
+    f = aesara.function([g], host_from_gpu(g))
     fv = f(gv)
     assert np.all(fv == av)
 
@@ -304,13 +304,13 @@ class TestGPUAlloc(TestAlloc):
 
 def test_alloc_empty():
     for dt in ["float32", "int8"]:
-        f = theano.function([], GpuAllocEmpty(dt, context_name=test_ctx_name)(2, 3))
+        f = aesara.function([], GpuAllocEmpty(dt, context_name=test_ctx_name)(2, 3))
         assert len(f.maker.fgraph.apply_nodes) == 1
         out = f()
         assert out.shape == (2, 3)
         assert out.dtype == dt
 
-    f = theano.function(
+    f = aesara.function(
         [],
         [
             GpuAllocEmpty("uint64", test_ctx_name)(3, 2),
@@ -337,17 +337,17 @@ def test_alloc_empty():
 def test_shape():
     x = GpuArrayType(dtype="float32", broadcastable=[False, False, False])()
     v = gpuarray.zeros((3, 4, 5), dtype="float32", context=get_context(test_ctx_name))
-    f = theano.function([x], x.shape)
+    f = aesara.function([x], x.shape)
     topo = f.maker.fgraph.toposort()
     assert np.all(f(v) == (3, 4, 5))
-    if theano.config.mode != "FAST_COMPILE":
+    if aesara.config.mode != "FAST_COMPILE":
         assert len(topo) == 4
         assert isinstance(topo[0].op, Shape_i)
         assert isinstance(topo[1].op, Shape_i)
         assert isinstance(topo[2].op, Shape_i)
         assert isinstance(topo[3].op, MakeVector)
     mode = mode_with_gpu.excluding("local_shape_to_shape_i")
-    f = theano.function([x], x.shape, mode=mode)
+    f = aesara.function([x], x.shape, mode=mode)
     topo = f.maker.fgraph.toposort()
     assert np.all(f(v) == (3, 4, 5))
     assert len(topo) == 1
@@ -360,7 +360,7 @@ def test_gpu_contiguous():
     a_val = np.asarray(np.random.rand(4, 5), dtype="float32")
     # The reshape is needed otherwise we make the subtensor on the CPU
     # to transfer less data.
-    f = theano.function(
+    f = aesara.function(
         [a, i], gpu_contiguous(a.reshape((5, 4))[::i]), mode=mode_with_gpu
     )
     topo = f.maker.fgraph.toposort()
@@ -379,7 +379,7 @@ def setup_method(self):
         self.ignore_topo = (
             HostFromGpu,
             GpuFromHost,
-            theano.compile.DeepCopyOp,
+            aesara.compile.DeepCopyOp,
             GpuDimShuffle,
             GpuElemwise,
             Shape_i,
@@ -405,7 +405,7 @@ def setup_method(self):
         self.make_vector_op = GpuJoin()
         # this is to avoid errors with limited devices
         self.floatX = "float32"
-        self.hide_error = theano.config.mode not in ["DebugMode", "DEBUG_MODE"]
+        self.hide_error = aesara.config.mode not in ["DebugMode", "DEBUG_MODE"]
 
         def shared(x, **kwargs):
             return gpuarray_shared_constructor(x, target=test_ctx_name, **kwargs)
@@ -419,7 +419,7 @@ def test_gpusplit_opt(self):
         m = self.shared(rng.rand(4, 6).astype("float16"))
         o = Split(2)(m, 0, [2, 2])
         assert o[0].dtype == "float16"
-        f = theano.function([], o, mode=self.mode)
+        f = aesara.function([], o, mode=self.mode)
         assert any(
             [
                 isinstance(node.op, self.split_op_class)
@@ -437,13 +437,13 @@ def test_gpujoin_gpualloc():
     b = fmatrix("b")
     b_val = np.asarray(np.random.rand(3, 5), dtype="float32")
 
-    f = theano.function(
+    f = aesara.function(
         [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)) + 4, mode=mode_without_gpu
     )
-    f_gpu = theano.function(
+    f_gpu = aesara.function(
         [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)), mode=mode_with_gpu
     )
-    f_gpu2 = theano.function(
+    f_gpu2 = aesara.function(
         [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)) + 4, mode=mode_with_gpu
     )
     assert sum([node.op == tt.alloc for node in f.maker.fgraph.toposort()]) == 2
@@ -463,7 +463,7 @@ def test_gpujoin_gpualloc():
 
 def test_gpueye():
     def check(dtype, N, M_=None, k=0):
-        # Theano does not accept None as a tensor.
+        # Aesara does not accept None as a tensor.
         # So we must use a real value.
         M = M_
         # Currently DebugMode does not support None as inputs even if this is
@@ -474,7 +474,7 @@ def check(dtype, N, M_=None, k=0):
         M_symb = iscalar()
         k_symb = iscalar()
         out = tt.eye(N_symb, M_symb, k_symb, dtype=dtype) + np.array(1).astype(dtype)
-        f = theano.function([N_symb, M_symb, k_symb], out, mode=mode_with_gpu)
+        f = aesara.function([N_symb, M_symb, k_symb], out, mode=mode_with_gpu)
 
         result = np.asarray(f(N, M, k)) - np.array(1).astype(dtype)
         assert np.allclose(result, np.eye(N, M_, k, dtype=dtype))
@@ -511,24 +511,24 @@ def test_hostfromgpu_shape_i():
         "local_dot_to_dot22", "local_dot22_to_dot22scalar", "specialize"
     )
     a = fmatrix("a")
-    ca = theano.gpuarray.type.GpuArrayType("float32", (False, False))()
+    ca = aesara.gpuarray.type.GpuArrayType("float32", (False, False))()
     av = np.asarray(np.random.rand(5, 4), dtype="float32")
     cv = gpuarray.asarray(
         np.random.rand(5, 4), dtype="float32", context=get_context(test_ctx_name)
     )
 
-    f = theano.function([a], GpuFromHost(test_ctx_name)(a), mode=m)
+    f = aesara.function([a], GpuFromHost(test_ctx_name)(a), mode=m)
     assert any(isinstance(x.op, GpuFromHost) for x in f.maker.fgraph.toposort())
-    f = theano.function([a], GpuFromHost(test_ctx_name)(a).shape, mode=m)
+    f = aesara.function([a], GpuFromHost(test_ctx_name)(a).shape, mode=m)
     topo = f.maker.fgraph.toposort()
     assert isinstance(topo[0].op, Shape_i)
     assert isinstance(topo[1].op, Shape_i)
     assert isinstance(topo[2].op, MakeVector)
     assert tuple(f(av)) == (5, 4)
 
-    f = theano.function([ca], host_from_gpu(ca), mode=m)
+    f = aesara.function([ca], host_from_gpu(ca), mode=m)
     assert host_from_gpu in [x.op for x in f.maker.fgraph.toposort()]
-    f = theano.function([ca], host_from_gpu(ca).shape, mode=m)
+    f = aesara.function([ca], host_from_gpu(ca).shape, mode=m)
     topo = f.maker.fgraph.toposort()
     assert isinstance(topo[0].op, Shape_i)
     assert isinstance(topo[1].op, Shape_i)
@@ -544,15 +544,15 @@ def test_Gpujoin_inplace():
     # Gpujoin should work inplace and the output should be the view of the
     # non-empty element.
     s = lscalar()
-    data = np.array([3, 4, 5], dtype=theano.config.floatX)
+    data = np.array([3, 4, 5], dtype=aesara.config.floatX)
     x = gpuarray_shared_constructor(data, borrow=True)
     z = tt.zeros((s,))
 
     join = GpuJoin(view=0)
     c = join(0, x, z)
 
-    f = theano.function([s], theano.Out(c, borrow=True))
-    if not isinstance(mode_with_gpu, theano.compile.debugmode.DebugMode):
+    f = aesara.function([s], aesara.Out(c, borrow=True))
+    if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode):
         assert x.get_value(borrow=True, return_internal_type=True) is f(0)
     assert np.allclose(f(0), [3, 4, 5])
 
@@ -562,7 +562,7 @@ def check_l(m, k=0):
         m_symb = matrix(dtype=m.dtype)
         k_symb = iscalar()
 
-        f = theano.function(
+        f = aesara.function(
             [m_symb, k_symb], tt.tril(m_symb, k_symb), mode=mode_with_gpu
         )
         result = f(m, k)
@@ -573,7 +573,7 @@ def check_l(m, k=0):
     def check_u(m, k=0):
         m_symb = matrix(dtype=m.dtype)
         k_symb = iscalar()
-        f = theano.function(
+        f = aesara.function(
             [m_symb, k_symb], tt.triu(m_symb, k_symb), mode=mode_with_gpu
         )
         result = f(m, k)
@@ -616,7 +616,7 @@ def check_u(m, k=0):
 
 def test_gputri():
     def check(dtype, N, M_=None, k=0):
-        # Theano does not accept None as a tensor.
+        # Aesara does not accept None as a tensor.
         # So we must use a real value.
         M = M_
         # Currently DebugMode does not support None as inputs even if this is
@@ -627,7 +627,7 @@ def check(dtype, N, M_=None, k=0):
         M_symb = iscalar()
         k_symb = iscalar()
         out = tt.tri(N_symb, M_symb, k_symb, dtype=dtype) + np.array(1).astype(dtype)
-        f = theano.function([N_symb, M_symb, k_symb], out, mode=mode_with_gpu)
+        f = aesara.function([N_symb, M_symb, k_symb], out, mode=mode_with_gpu)
         result = np.asarray(f(N, M, k)) - np.array(1).astype(dtype)
         assert np.allclose(result, np.tri(N, M_, k, dtype=dtype))
         assert result.dtype == np.dtype(dtype)
diff --git a/tests/gpuarray/test_blas.py b/tests/gpuarray/test_blas.py
index 539b1fcfa2..0fe2d65ab0 100644
--- a/tests/gpuarray/test_blas.py
+++ b/tests/gpuarray/test_blas.py
@@ -2,14 +2,10 @@
 
 import numpy as np
 
-import theano
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, test_ctx_name
-from tests.gpuarray.test_basic_ops import makeTester, rand
-from tests.tensor.test_blas import BaseGemv, TestGer
-from theano.configdefaults import config
-from theano.gpuarray import gpuarray_shared_constructor
-from theano.gpuarray.blas import (
+import aesara
+from aesara.configdefaults import config
+from aesara.gpuarray import gpuarray_shared_constructor
+from aesara.gpuarray.blas import (
     GpuGemm,
     GpuGer,
     gpu_dot22,
@@ -21,7 +17,7 @@
     gpuger_inplace,
     gpuger_no_inplace,
 )
-from theano.tensor.blas import (
+from aesara.tensor.blas import (
     BatchedDot,
     _dot22,
     batched_dot,
@@ -29,8 +25,12 @@
     gemv,
     gemv_inplace,
 )
-from theano.tensor.math import dot
-from theano.tensor.type import matrix, tensor, tensor3, vector
+from aesara.tensor.math import dot
+from aesara.tensor.type import matrix, tensor, tensor3, vector
+from tests import unittest_tools as utt
+from tests.gpuarray.config import mode_with_gpu, test_ctx_name
+from tests.gpuarray.test_basic_ops import makeTester, rand
+from tests.tensor.test_blas import BaseGemv, TestGer
 
 
 TestGpuGemv = makeTester(
@@ -76,7 +76,7 @@ def test_float16():
         gpuarray_shared_constructor(val, target=test_ctx_name) for val in float16_data
     ]
     o = gemv(*float16_shared)
-    f = theano.function([], o, mode=mode_with_gpu)
+    f = aesara.function([], o, mode=mode_with_gpu)
     y, alpha, A, x, beta = float16_data
     out = f()
     utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
@@ -95,7 +95,7 @@ def test_float16():
         gpuarray_shared_constructor(val, target=test_ctx_name) for val in float16_data
     ]
     o = gpugemm_no_inplace(*float16_shared)
-    f = theano.function([], o)
+    f = aesara.function([], o)
     y, alpha, A, x, beta = float16_data
     out = f()
     utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
@@ -105,7 +105,7 @@ def test_float16():
 
     float16_shared = [gpuarray_shared_constructor(val) for val in float16_data]
     o = gpu_dot22(*float16_shared)
-    f = theano.function([], o)
+    f = aesara.function([], o)
     x, y = float16_data
     out = f()
     utt.assert_allclose(np.asarray(out), np.dot(x, y))
@@ -123,7 +123,7 @@ def shared(val):
         try:
             return gpuarray_shared_constructor(val)
         except TypeError:
-            return theano.shared(val)
+            return aesara.shared(val)
 
 
 TestGpuGemm = makeTester(
@@ -205,7 +205,7 @@ def test_basic(self):
         x = tensor3()
         y = tensor3()
         z = batched_dot(x, y[:, 0, :, np.newaxis])
-        f = theano.function([x, y], z, mode=mode_with_gpu)
+        f = aesara.function([x, y], z, mode=mode_with_gpu)
         x_num = np.arange(32 * 19 * 600, dtype=config.floatX).reshape((32, 19, 600))
         y_num = np.arange(7 * 32 * 600, dtype=config.floatX).reshape((32, 7, 600))
         f(x_num, y_num)
@@ -261,12 +261,12 @@ def clone(self, op):
 def test_gemv_zeros():
     W = matrix()
     v = vector()
-    f = theano.function([W, v], W.dot(v), mode=mode_with_gpu)
+    f = aesara.function([W, v], W.dot(v), mode=mode_with_gpu)
 
     # Apply to an empty matrix shape (5,0) and an empty vector shape (0,)
     dim = 1000
-    A = np.zeros((dim, 0), dtype=theano.config.floatX)
-    b = np.zeros((0,), dtype=theano.config.floatX)
+    A = np.zeros((dim, 0), dtype=aesara.config.floatX)
+    b = np.zeros((0,), dtype=aesara.config.floatX)
     tmp = f(A, b)
     assert np.allclose(tmp, np.zeros((dim,)))
 
@@ -277,6 +277,6 @@ def test_gemv_dot_strides():
     yv = rand(5, 1)
     x = gpuarray_shared_constructor(xv)
     y = gpuarray_shared_constructor(yv, broadcastable=(False, True))
-    f = theano.function([], dot(x, y[::-1]), mode=mode_with_gpu)
+    f = aesara.function([], dot(x, y[::-1]), mode=mode_with_gpu)
     out = f()
     utt.assert_allclose(out, np.dot(xv, yv[::-1]))
diff --git a/tests/gpuarray/test_blocksparse.py b/tests/gpuarray/test_blocksparse.py
index ef8b7f06bf..0231098521 100644
--- a/tests/gpuarray/test_blocksparse.py
+++ b/tests/gpuarray/test_blocksparse.py
@@ -1,18 +1,18 @@
 import numpy as np
 import pytest
 
+import aesara
 import tests.unittest_tools as utt
-import theano
-from tests.gpuarray.config import mode_with_gpu, test_ctx_name
-from tests.tensor.nnet.test_blocksparse import TestBlockSparseGemvAndOuter
-from theano.gpuarray.blocksparse import (
+from aesara.gpuarray.blocksparse import (
     GpuSparseBlockGemv,
     GpuSparseBlockOuter,
     gpu_sparse_block_gemv,
     gpu_sparse_block_outer,
 )
-from theano.gpuarray.type import gpuarray_shared_constructor
-from theano.tensor.type import fmatrix, ftensor3, lmatrix
+from aesara.gpuarray.type import gpuarray_shared_constructor
+from aesara.tensor.type import fmatrix, ftensor3, lmatrix
+from tests.gpuarray.config import mode_with_gpu, test_ctx_name
+from tests.tensor.nnet.test_blocksparse import TestBlockSparseGemvAndOuter
 
 
 class TestBlockSparseGemvAndOuterGPUarray(TestBlockSparseGemvAndOuter):
@@ -42,13 +42,13 @@ def test_blocksparse_grad_merge(self):
         W = gpuarray_shared_constructor(W_val, context=test_ctx_name)
 
         o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-        gW = theano.grad(o.sum(), W)
+        gW = aesara.grad(o.sum(), W)
 
         lr = np.asarray(0.05, dtype="float32")
 
         upd = W - lr * gW
 
-        f1 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode_with_gpu)
+        f1 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode_with_gpu)
 
         # Make sure the lr update was merged.
         assert isinstance(f1.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter)
@@ -57,7 +57,7 @@ def test_blocksparse_grad_merge(self):
         mode = mode_with_gpu.excluding("local_merge_blocksparse_alpha")
         mode = mode.excluding("local_merge_blocksparse_output")
 
-        f2 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)
+        f2 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)
 
         # Make sure the lr update is not merged.
         assert not isinstance(f2.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter)
diff --git a/tests/gpuarray/test_cgpukernelbase.py b/tests/gpuarray/test_cgpukernelbase.py
index eb899fa42b..c8783e7786 100644
--- a/tests/gpuarray/test_cgpukernelbase.py
+++ b/tests/gpuarray/test_cgpukernelbase.py
@@ -1,15 +1,15 @@
 import numpy as np
 import pytest
 
-import theano
-from theano import config
-from theano import tensor as tt
-from theano.gpuarray.basic_ops import CGpuKernelBase
-from theano.gpuarray.type import GpuArrayType, get_context, gpu_context_type
-from theano.gradient import grad_undefined
-from theano.graph.basic import Apply
-from theano.graph.params_type import ParamsType
-from theano.scalar import int32 as int_t
+import aesara
+from aesara import config
+from aesara import tensor as tt
+from aesara.gpuarray.basic_ops import CGpuKernelBase
+from aesara.gpuarray.type import GpuArrayType, get_context, gpu_context_type
+from aesara.gradient import grad_undefined
+from aesara.graph.basic import Apply
+from aesara.graph.params_type import ParamsType
+from aesara.scalar import int32 as int_t
 
 
 class GpuEye(CGpuKernelBase):
@@ -69,7 +69,7 @@ def test_cgpukernelbase():
 
     op = GpuEye(dtype="int32", context_name=test_ctx_name)
 
-    f = theano.function([], op(4, 5), mode=mode_with_gpu)
+    f = aesara.function([], op(4, 5), mode=mode_with_gpu)
 
     r = f()
 
diff --git a/tests/gpuarray/test_ctc.py b/tests/gpuarray/test_ctc.py
index 638094fa04..892abd24ae 100644
--- a/tests/gpuarray/test_ctc.py
+++ b/tests/gpuarray/test_ctc.py
@@ -1,19 +1,19 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.gpuarray
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
-from tests.tensor.nnet.test_ctc import setup_ctc_case, setup_grad_case, setup_torch_case
-from theano.gpuarray.ctc import GpuConnectionistTemporalClassification, gpu_ctc
-from theano.gradient import grad
-from theano.tensor.math import mean
-from theano.tensor.nnet.ctc import (
+import aesara
+import aesara.gpuarray
+from aesara.gpuarray.ctc import GpuConnectionistTemporalClassification, gpu_ctc
+from aesara.gradient import grad
+from aesara.tensor.math import mean
+from aesara.tensor.nnet.ctc import (
     ConnectionistTemporalClassification,
     ctc,
     ctc_available,
 )
+from tests import unittest_tools as utt
+from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
+from tests.tensor.nnet.test_ctc import setup_ctc_case, setup_grad_case, setup_torch_case
 
 
 @pytest.mark.skipif(
@@ -24,9 +24,9 @@ def check_ctc(
         self, activations, labels, input_length, expected_costs, expected_grads
     ):
         # Create symbolic variables
-        t_activations = theano.shared(activations, name="activations")
-        t_activation_times = theano.shared(input_length, name="activation_times")
-        t_labels = theano.shared(labels, name="labels")
+        t_activations = aesara.shared(activations, name="activations")
+        t_activation_times = aesara.shared(input_length, name="activation_times")
+        t_labels = aesara.shared(labels, name="labels")
 
         inputs = [t_activations, t_labels, t_activation_times]
 
@@ -53,7 +53,7 @@ def setup_cpu_op(
             # Symbolic gradient of CTC cost
             cpu_ctc_grad = grad(mean(cpu_ctc_cost), activations)
             outputs += [cpu_ctc_grad]
-        return theano.function([], outputs, mode=mode)
+        return aesara.function([], outputs, mode=mode)
 
     def setup_gpu_op(self, activations, labels, input_length, compute_grad=True):
         gpu_ctc_cost = gpu_ctc(activations, labels, input_length)
@@ -62,7 +62,7 @@ def setup_gpu_op(self, activations, labels, input_length, compute_grad=True):
             # Symbolic gradient of CTC cost
             gpu_ctc_grad = grad(mean(gpu_ctc_cost), activations)
             outputs += [gpu_ctc_grad]
-        return theano.function([], outputs, mode=mode_with_gpu)
+        return aesara.function([], outputs, mode=mode_with_gpu)
 
     def check_expected_values(
         self, activations, labels, input_length, expected_costs, expected_grads
@@ -96,7 +96,7 @@ def check_grads_disabled(self, activations, labels, input_length):
         Check if optimization to disable gradients is working
         """
         gpu_ctc_cost = gpu_ctc(activations, labels, input_length)
-        gpu_ctc_function = theano.function([], [gpu_ctc_cost])
+        gpu_ctc_function = aesara.function([], [gpu_ctc_cost])
         for node in gpu_ctc_function.maker.fgraph.apply_nodes:
             if isinstance(node.op, GpuConnectionistTemporalClassification):
                 assert node.op.compute_grad is False
@@ -167,8 +167,8 @@ def test_verify_grad(self):
         def ctc_op_functor(labels, in_lengths):
             def wrapper(acts):
                 # Create auxiliary symbolic variables
-                t_activation_times = theano.shared(in_lengths, name="activation_times")
-                t_labels = theano.shared(labels, name="labels")
+                t_activation_times = aesara.shared(in_lengths, name="activation_times")
+                t_labels = aesara.shared(labels, name="labels")
                 return gpu_ctc(acts, t_labels, t_activation_times)
 
             return wrapper
diff --git a/tests/gpuarray/test_dnn.py b/tests/gpuarray/test_dnn.py
index b008668a31..e8f4c4b1c2 100644
--- a/tests/gpuarray/test_dnn.py
+++ b/tests/gpuarray/test_dnn.py
@@ -10,26 +10,14 @@
 from io import StringIO
 from itertools import chain, product
 
+import aesara
+import aesara.tensor as tt
 import tests.unittest_tools as utt
-import theano
-import theano.tensor as tt
-from tests.gpuarray import test_nnet
-from tests.gpuarray.config import (
-    mode_with_gpu,
-    mode_without_gpu,
-    ref_cast,
-    test_ctx_name,
-)
-from tests.gpuarray.rnn_support import GRU, LSTM, Model, WrapperLayer
-from tests.tensor.nnet.test_abstract_conv import (
-    TestGroupedConv3dNoOptim,
-    TestGroupedConvNoOptim,
-)
-from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
-from theano.gpuarray import dnn
-from theano.gpuarray.basic_ops import GpuAllocEmpty
-from theano.gpuarray.type import GpuArrayType, gpuarray_shared_constructor
-from theano.tensor.math import (
+from aesara.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
+from aesara.gpuarray import dnn
+from aesara.gpuarray.basic_ops import GpuAllocEmpty
+from aesara.gpuarray.type import GpuArrayType, gpuarray_shared_constructor
+from aesara.tensor.math import (
     ceil,
     clip,
     dot,
@@ -43,31 +31,24 @@
     prod,
     sqrt,
 )
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.nnet import (
-    LogSoftmax,
-    Softmax,
-    SoftmaxGrad,
-    batchnorm,
-    conv2d,
-    softmax,
-    softmax_op,
-)
-from theano.tensor.nnet.abstract_conv import (
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.nnet import batchnorm, conv2d, softmax, softmax_op
+from aesara.tensor.nnet.abstract_conv import (
     get_conv_gradinputs_shape,
     get_conv_output_shape,
 )
-from theano.tensor.nnet.corr import CorrMM
-from theano.tensor.nnet.corr3d import Corr3dMM
-from theano.tensor.shape import reshape
-from theano.tensor.signal.pool import (
+from aesara.tensor.nnet.basic import LogSoftmax, Softmax, SoftmaxGrad
+from aesara.tensor.nnet.corr import CorrMM
+from aesara.tensor.nnet.corr3d import Corr3dMM
+from aesara.tensor.shape import reshape
+from aesara.tensor.signal.pool import (
     AveragePoolGrad,
     MaxPoolGrad,
     Pool,
     pool_2d,
     pool_3d,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     TensorType,
     dtensor4,
     dtensor5,
@@ -81,6 +62,18 @@
     tensor6,
     vector,
 )
+from tests.gpuarray import test_nnet
+from tests.gpuarray.config import (
+    mode_with_gpu,
+    mode_without_gpu,
+    ref_cast,
+    test_ctx_name,
+)
+from tests.gpuarray.rnn_support import GRU, LSTM, Model, WrapperLayer
+from tests.tensor.nnet.test_abstract_conv import (
+    TestGroupedConv3dNoOptim,
+    TestGroupedConvNoOptim,
+)
 
 
 if not dnn.dnn_available(test_ctx_name):
@@ -113,7 +106,7 @@ def set_precision(floatX):
     if floatX == "float16":
         precision = "float32"
     else:
-        precision = theano.config.floatX
+        precision = aesara.config.floatX
     return precision
 
 
@@ -127,9 +120,9 @@ def test_dnn_conv_desc_merge():
     )(kern_shp)
     # CDataType is not DeepCopyable so this will crash if we don't use
     # borrow=True
-    f = theano.function(
+    f = aesara.function(
         [],
-        [theano.Out(desc1, borrow=True), theano.Out(desc2, borrow=True)],
+        [aesara.Out(desc1, borrow=True), aesara.Out(desc2, borrow=True)],
         mode=mode_with_gpu,
     )
 
@@ -151,10 +144,10 @@ def test_dnn_conv_merge():
     # Test forward op
     o1 = dnn.dnn_conv(img, kern)
     o2 = dnn.dnn_conv(img, kern)
-    f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
+    f = aesara.function([img, kern], [o1, o2], mode=mode_with_gpu)
     d1, d2 = f(
-        np.random.rand(*img_shp).astype(theano.config.floatX),
-        np.random.rand(*kern_shp).astype(theano.config.floatX),
+        np.random.rand(*img_shp).astype(aesara.config.floatX),
+        np.random.rand(*kern_shp).astype(aesara.config.floatX),
     )
     topo = f.maker.fgraph.toposort()
     assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConv)]) == 1
@@ -162,14 +155,14 @@ def test_dnn_conv_merge():
     # Test grad w op
     o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc)
     o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc)
-    f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu)
+    f = aesara.function([img, kern, out], [o1, o2], mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)]) == 1
 
     # Test grad i op
     o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc)
     o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc)
-    f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu)
+    f = aesara.function([img, kern, out], [o1, o2], mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)]) == 1
 
@@ -190,10 +183,10 @@ def test_dnn_conv_inplace():
     # Test forward op
     o1 = dnn.dnn_conv(img, kern, conv_mode="conv")
     o2 = dnn.dnn_conv(img, kern, conv_mode="cross")
-    f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
+    f = aesara.function([img, kern], [o1, o2], mode=mode_with_gpu)
     d1, d2 = f(
-        np.random.rand(*img_shp).astype(theano.config.floatX),
-        np.random.rand(*kern_shp).astype(theano.config.floatX),
+        np.random.rand(*img_shp).astype(aesara.config.floatX),
+        np.random.rand(*kern_shp).astype(aesara.config.floatX),
     )
     topo = f.maker.fgraph.toposort()
     convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConv)]
@@ -205,7 +198,7 @@ def test_dnn_conv_inplace():
     out = GpuAllocEmpty(kern.dtype, test_ctx_name)(*kern.shape)
     o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc1)
     o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc2)
-    f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
+    f = aesara.function([img, kern], [o1, o2], mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)]
     assert len(convs) == 2
@@ -216,7 +209,7 @@ def test_dnn_conv_inplace():
     out = GpuAllocEmpty(img.dtype, test_ctx_name)(*img.shape)
     o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc1)
     o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc2)
-    f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
+    f = aesara.function([img, kern], [o1, o2], mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)]
     assert len(convs) == 2
@@ -226,9 +219,9 @@ def test_dnn_conv_inplace():
 
 def run_dnn_conv_invalid_precision(ndim):
     bc = (False,) * (ndim + 2)
-    img = tensor(theano.config.floatX, broadcastable=bc)
-    kerns = tensor(theano.config.floatX, broadcastable=bc)
-    topgrad = tensor(theano.config.floatX, broadcastable=bc)
+    img = tensor(aesara.config.floatX, broadcastable=bc)
+    kerns = tensor(aesara.config.floatX, broadcastable=bc)
+    topgrad = tensor(aesara.config.floatX, broadcastable=bc)
     shape = np.arange(ndim + 2)
     if ndim == 2:
         dnn_conv_func = dnn.dnn_conv
@@ -349,7 +342,7 @@ def test_pooling():
                 mode_without_gpu2.check_isfinite = False
 
                 # GPU implementation
-                f_gpu = theano.function([x], out, mode=mode_with_gpu)
+                f_gpu = aesara.function([x], out, mode=mode_with_gpu)
                 assert any(
                     [
                         isinstance(node.op, dnn.GpuDnnPool)
@@ -358,7 +351,7 @@ def test_pooling():
                 )
 
                 # CPU implementation
-                f_cpu = theano.function([x], out, mode=mode_without_gpu2)
+                f_cpu = aesara.function([x], out, mode=mode_without_gpu2)
                 assert not any(
                     [
                         isinstance(node.op, dnn.GpuDnnPool)
@@ -377,14 +370,14 @@ def test_pooling():
                     (1, 3, 99, 99),
                     (32, 1, 147, 197),
                 ]:
-                    data = np.random.normal(0, 1, shp).astype(theano.config.floatX)
+                    data = np.random.normal(0, 1, shp).astype(aesara.config.floatX)
                     a = f_cpu(data).__array__()
                     b = f_gpu(data).__array__()
                     utt.assert_allclose(a, b)
 
         # Test the grad
         for shp in [(1, 1, 2, 2), (1, 1, 3, 3)]:
-            data = np.random.normal(0, 1, shp).astype(theano.config.floatX) * 10
+            data = np.random.normal(0, 1, shp).astype(aesara.config.floatX) * 10
 
             ws = 2
             stride = 2
@@ -398,7 +391,7 @@ def fn(x):
 
             utt.verify_grad(fn, [data], mode=mode_with_gpu)
             # Confirm that the opt would have inserted it.
-            fg = theano.function([x], theano.grad(fn(x).sum(), x), mode=mode_with_gpu)
+            fg = aesara.function([x], aesara.grad(fn(x).sum(), x), mode=mode_with_gpu)
             assert any(
                 [
                     isinstance(node.op, dnn.GpuDnnPoolGrad)
@@ -415,7 +408,7 @@ def fn(x):
 
             utt.verify_grad(fn, [data], mode=mode_with_gpu)
             # Confirm that we get the good op.
-            fg = theano.function([x], theano.grad(fn(x).sum(), x), mode=mode_with_gpu)
+            fg = aesara.function([x], aesara.grad(fn(x).sum(), x), mode=mode_with_gpu)
             assert any(
                 [
                     isinstance(node.op, dnn.GpuDnnPoolGrad)
@@ -430,30 +423,30 @@ def run_pooling_with_tensor_vars(mode):
     utt.seed_rng()
 
     x = tensor4()
-    ws = theano.shared(np.array([2, 2], dtype="int32"))
-    stride = theano.shared(np.array([1, 1], dtype="int32"))
-    pad = theano.shared(np.array([0, 0], dtype="int32"))
+    ws = aesara.shared(np.array([2, 2], dtype="int32"))
+    stride = aesara.shared(np.array([1, 1], dtype="int32"))
+    pad = aesara.shared(np.array([0, 0], dtype="int32"))
 
     def fn(x):
         dnn_op = dnn.dnn_pool(x, ws=ws, stride=stride, pad=pad, mode=mode)
         return dnn_op
 
     for shp in [(1, 1, 2, 2), (1, 1, 3, 3)]:
-        data = np.random.normal(0, 1, shp).astype(theano.config.floatX) * 10
+        data = np.random.normal(0, 1, shp).astype(aesara.config.floatX) * 10
         utt.verify_grad(fn, [data], mode=mode_with_gpu)
 
     mode_without_gpu2 = mode_without_gpu.including()
     mode_without_gpu2.check_isfinite = False
 
     # GPU implementation
-    f_gpu = theano.function([x], fn(x), mode=mode_with_gpu)
+    f_gpu = aesara.function([x], fn(x), mode=mode_with_gpu)
     assert any(
         [isinstance(node.op, dnn.GpuDnnPool) for node in f_gpu.maker.fgraph.apply_nodes]
     )
 
     # CPU implementation
     out_cpu = pool_2d(x, ws, ignore_border=True, stride=stride, pad=pad, mode=mode)
-    f_cpu = theano.function([x], out_cpu, mode=mode_without_gpu2)
+    f_cpu = aesara.function([x], out_cpu, mode=mode_without_gpu2)
     assert not any(
         [isinstance(node.op, dnn.GpuDnnPool) for node in f_cpu.maker.fgraph.apply_nodes]
     )
@@ -461,7 +454,7 @@ def fn(x):
 
     i = 1
     for shp in [(1, 10, 100, 100), (1, 3, 99, 99), (32, 1, 147, 197)]:
-        data = np.random.normal(0, 1, shp).astype(theano.config.floatX)
+        data = np.random.normal(0, 1, shp).astype(aesara.config.floatX)
 
         # Change the window size dynamically
         ws.set_value(np.array([i, i]).astype("int32"))
@@ -483,7 +476,7 @@ def test_pooling3d():
     utt.seed_rng()
 
     # We force the FAST_RUN as we don't want the reference to run in DebugMode.
-    mode_without_gpu_ref = theano.compile.mode.get_mode("FAST_RUN").excluding(
+    mode_without_gpu_ref = aesara.compile.mode.get_mode("FAST_RUN").excluding(
         "gpuarray"
     )
 
@@ -515,7 +508,7 @@ def test_pooling3d():
                 )
 
                 # GPU implementation
-                f_gpu = theano.function([x], out, mode=mode_with_gpu)
+                f_gpu = aesara.function([x], out, mode=mode_with_gpu)
                 assert any(
                     [
                         isinstance(node.op, dnn.GpuDnnPool)
@@ -524,7 +517,7 @@ def test_pooling3d():
                 )
 
                 # CPU implementation
-                f_cpu = theano.function([x], out, mode=mode_without_gpu_ref)
+                f_cpu = aesara.function([x], out, mode=mode_without_gpu_ref)
                 assert not any(
                     [
                         isinstance(node.op, dnn.GpuDnnPool)
@@ -543,10 +536,10 @@ def test_pooling3d():
                     (1, 3, 99, 99, 29),
                     (2, 1, 147, 97, 37),
                 ]:
-                    data = np.random.normal(0, 1, shp).astype(theano.config.floatX)
+                    data = np.random.normal(0, 1, shp).astype(aesara.config.floatX)
                     a = f_cpu(data).__array__()
                     b = f_gpu(data).__array__()
-                    utt.assert_allclose(a, b, atol=np.finfo(theano.config.floatX).eps)
+                    utt.assert_allclose(a, b, atol=np.finfo(aesara.config.floatX).eps)
 
         # Test the grad
         for shp in [
@@ -558,7 +551,7 @@ def test_pooling3d():
             (1, 1, 4, 4, 4),
             (1, 1, 5, 5, 5),
         ]:
-            data = np.random.normal(0, 1, shp).astype(theano.config.floatX) * 10
+            data = np.random.normal(0, 1, shp).astype(aesara.config.floatX) * 10
 
             ws = 2
             stride = 2
@@ -579,7 +572,7 @@ def fn(x):
 
             utt.verify_grad(fn, [data], mode=mode_with_gpu)
             # Confirm that we get the good op.
-            fg = theano.function([x], theano.grad(fn(x).sum(), x), mode=mode_with_gpu)
+            fg = aesara.function([x], aesara.grad(fn(x).sum(), x), mode=mode_with_gpu)
             assert any(
                 [
                     isinstance(node.op, dnn.GpuDnnPoolGrad)
@@ -594,7 +587,7 @@ def test_pooling_opt():
     # 2D pooling
     x = matrix()
 
-    f = theano.function(
+    f = aesara.function(
         [x],
         pool_2d(x, ws=(2, 2), mode="average_inc_pad", ignore_border=True),
         mode=mode_with_gpu,
@@ -602,12 +595,12 @@ def test_pooling_opt():
 
     assert any([isinstance(n.op, dnn.GpuDnnPool) for n in f.maker.fgraph.toposort()])
 
-    f(np.zeros((10, 10), dtype=theano.config.floatX))
+    f(np.zeros((10, 10), dtype=aesara.config.floatX))
 
     # gradient of 2D pooling
-    f = theano.function(
+    f = aesara.function(
         [x],
-        theano.grad(
+        aesara.grad(
             pool_2d(x, ws=(2, 2), mode="average_inc_pad", ignore_border=True).sum(), x
         ),
         mode=mode_with_gpu.including("cudnn"),
@@ -617,21 +610,21 @@ def test_pooling_opt():
         [isinstance(n.op, dnn.GpuDnnPoolGrad) for n in f.maker.fgraph.toposort()]
     )
 
-    f(np.zeros((10, 10), dtype=theano.config.floatX))
+    f(np.zeros((10, 10), dtype=aesara.config.floatX))
 
     # Test sum pooling
-    f = theano.function(
+    f = aesara.function(
         [x], pool_2d(x, ws=(2, 3), mode="sum", ignore_border=True), mode=mode_with_gpu
     )
 
     assert any([isinstance(n.op, dnn.GpuDnnPool) for n in f.maker.fgraph.toposort()])
-    data = np.random.rand(10, 10).astype(theano.config.floatX)
+    data = np.random.rand(10, 10).astype(aesara.config.floatX)
     f(data)
 
     # 3D pooling
     x = tensor3()
 
-    f = theano.function(
+    f = aesara.function(
         [x],
         pool_3d(x, ws=(2, 2, 2), mode="average_inc_pad", ignore_border=True),
         mode=mode_with_gpu,
@@ -639,12 +632,12 @@ def test_pooling_opt():
 
     assert any([isinstance(n.op, dnn.GpuDnnPool) for n in f.maker.fgraph.toposort()])
 
-    f(np.zeros((10, 10, 10), dtype=theano.config.floatX))
+    f(np.zeros((10, 10, 10), dtype=aesara.config.floatX))
 
     # gradient of 3D pooling
-    f = theano.function(
+    f = aesara.function(
         [x],
-        theano.grad(
+        aesara.grad(
             pool_3d(x, ws=(2, 2, 2), mode="average_inc_pad", ignore_border=True).sum(),
             x,
         ),
@@ -655,7 +648,7 @@ def test_pooling_opt():
         [isinstance(n.op, dnn.GpuDnnPoolGrad) for n in f.maker.fgraph.toposort()]
     )
 
-    f(np.zeros((10, 10, 10), dtype=theano.config.floatX))
+    f(np.zeros((10, 10, 10), dtype=aesara.config.floatX))
 
 
 def test_pooling_opt_arbitrary_dimensions():
@@ -670,16 +663,16 @@ def test_pooling_opt_arbitrary_dimensions():
             # create input shape: non-pooling dimensions
             # followed by 2 or 3 pooling dimensions
             shp = tuple(range(2, 2 + n_non_pool_dims)) + tuple(range(5, 5 + len(ws)))
-            data = np.random.normal(0, 1, shp).astype(theano.config.floatX)
+            data = np.random.normal(0, 1, shp).astype(aesara.config.floatX)
             input = gpuarray_shared_constructor(data)
 
             for mode in modes:
                 out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
-                out_pool_grad = theano.grad(tt_sum(out_pool), wrt=input)
+                out_pool_grad = aesara.grad(tt_sum(out_pool), wrt=input)
                 out = [out_pool, out_pool_grad]
 
                 # run on GPU
-                fg = theano.function([], out, mode=mode_with_gpu)
+                fg = aesara.function([], out, mode=mode_with_gpu)
                 assert any(
                     [
                         isinstance(node.op, dnn.GpuDnnPool)
@@ -695,7 +688,7 @@ def test_pooling_opt_arbitrary_dimensions():
                 res_gpu = fg()
 
                 # run on CPU
-                fc = theano.function([], out, mode=mode_without_gpu)
+                fc = aesara.function([], out, mode=mode_without_gpu)
                 assert any(
                     [isinstance(node.op, Pool) for node in fc.maker.fgraph.toposort()]
                 )
@@ -725,12 +718,12 @@ def test_pooling_empty_batch():
     img = ftensor4("img")
 
     o = dnn.dnn_pool(img, (2, 2), (2, 2))
-    f = theano.function([img], o, mode=mode_with_gpu)
+    f = aesara.function([img], o, mode=mode_with_gpu)
     d = f(np.random.rand(*img_shp).astype("float32"))
     assert d.shape == (0, 5, 3, 4)
 
-    g = theano.grad(tt_sum(o), wrt=img)
-    f = theano.function([img], g, mode=mode_with_gpu)
+    g = aesara.grad(tt_sum(o), wrt=img)
+    f = aesara.function([img], g, mode=mode_with_gpu)
     d = f(np.random.rand(*img_shp).astype("float32"))
     # Not sure what to assert, it should just pass, that's all.
     assert d.shape == (0, 5, 6, 8)
@@ -740,17 +733,17 @@ def test_dnn_tag():
     # Test that if cudnn isn't avail we crash and that if it is avail, we use it.
 
     x = tensor4()
-    old = theano.config.on_opt_error
-    theano.config.on_opt_error = "raise"
+    old = aesara.config.on_opt_error
+    aesara.config.on_opt_error = "raise"
 
     sio = StringIO()
     handler = logging.StreamHandler(sio)
     logging.getLogger("tests.compile.test_dnn").addHandler(handler)
     # Silence original handler when intentionnally generating warning messages
-    logging.getLogger("theano").removeHandler(theano.logging_default_handler)
+    logging.getLogger("aesara").removeHandler(aesara.logging_default_handler)
     raised = False
     try:
-        f = theano.function(
+        f = aesara.function(
             [x],
             pool_2d(x, ws=(2, 2), ignore_border=True),
             mode=mode_with_gpu.including("cudnn"),
@@ -759,9 +752,9 @@ def test_dnn_tag():
         assert not dnn.dnn_available(test_ctx_name)
         raised = True
     finally:
-        theano.config.on_opt_error = old
+        aesara.config.on_opt_error = old
         logging.getLogger("tests.compile.test_dnn").removeHandler(handler)
-        logging.getLogger("theano").addHandler(theano.logging_default_handler)
+        logging.getLogger("aesara").addHandler(aesara.logging_default_handler)
 
     if not raised:
         assert dnn.dnn_available(test_ctx_name)
@@ -781,7 +774,7 @@ def setup_method(self):
 
     def test_softmax(self):
         t = tensor4("t")
-        rand_tensor = np.asarray(np.random.rand(5, 4, 3, 2), dtype=theano.config.floatX)
+        rand_tensor = np.asarray(np.random.rand(5, 4, 3, 2), dtype=aesara.config.floatX)
         self._compile_and_check(
             [t],
             [dnn.GpuDnnSoftmax("accurate", "channel")(t)],
@@ -791,7 +784,7 @@ def test_softmax(self):
 
         self._compile_and_check(
             [t],
-            [theano.grad(dnn.GpuDnnSoftmax("accurate", "channel")(t).mean(), t)],
+            [aesara.grad(dnn.GpuDnnSoftmax("accurate", "channel")(t).mean(), t)],
             [rand_tensor],
             dnn.GpuDnnSoftmaxGrad,
         )
@@ -809,8 +802,8 @@ def _test_conv(
         dilations,
         algo,
     ):
-        img_val = np.asarray(img_val, dtype=theano.config.floatX)
-        kern_vals = np.asarray(kern_vals, dtype=theano.config.floatX)
+        img_val = np.asarray(img_val, dtype=aesara.config.floatX)
+        kern_vals = np.asarray(kern_vals, dtype=aesara.config.floatX)
 
         for dilation in dilations:
             for subsample in subsamples:
@@ -822,14 +815,14 @@ def _test_conv(
                         subsample=subsample,
                         dilation=dilation,
                     ),
-                    dtype=theano.config.floatX,
+                    dtype=aesara.config.floatX,
                 )
                 desc = dnn.GpuDnnConvDesc(
                     border_mode=border_mode,
                     subsample=subsample,
                     dilation=dilation,
                     conv_mode=conv_mode,
-                    precision=set_precision(theano.config.floatX),
+                    precision=set_precision(aesara.config.floatX),
                 )(kerns.shape)
                 conv = dnn.GpuDnnConv(algo=algo)(img, kerns, out, desc)
                 self._compile_and_check(
@@ -901,8 +894,8 @@ def _test_conv_gradw(
         subsamples,
         dilations,
     ):
-        kerns_vals = np.zeros(kerns_shape, dtype=theano.config.floatX)
-        kerns_shape_shared = theano.shared(np.asarray(kerns_shape))
+        kerns_vals = np.zeros(kerns_shape, dtype=aesara.config.floatX)
+        kerns_shape_shared = aesara.shared(np.asarray(kerns_shape))
 
         for dilation in dilations:
             for subsample in subsamples:
@@ -911,10 +904,10 @@ def _test_conv_gradw(
                 )
 
                 img_val = np.asarray(
-                    np.random.rand(*img_shape), dtype=theano.config.floatX
+                    np.random.rand(*img_shape), dtype=aesara.config.floatX
                 )
                 topgrad_vals = np.asarray(
-                    np.random.rand(*topgrad_shape), dtype=theano.config.floatX
+                    np.random.rand(*topgrad_shape), dtype=aesara.config.floatX
                 )
 
                 desc = dnn.GpuDnnConvDesc(
@@ -922,7 +915,7 @@ def _test_conv_gradw(
                     subsample=subsample,
                     dilation=dilation,
                     conv_mode=conv_mode,
-                    precision=set_precision(theano.config.floatX),
+                    precision=set_precision(aesara.config.floatX),
                 )(kerns_shape_shared)
                 conv_grad_w = dnn.GpuDnnConvGradW()(
                     img,
@@ -959,8 +952,8 @@ def test_conv_gradi(self):
         img = tensor4("img")
         kerns = tensor4("kerns")
         out = tensor4("out")
-        kern_vals = np.asarray(np.random.rand(13, 4, 5, 6), dtype=theano.config.floatX)
-        out_vals = np.asarray(np.random.rand(3, 13, 9, 11), dtype=theano.config.floatX)
+        kern_vals = np.asarray(np.random.rand(13, 4, 5, 6), dtype=aesara.config.floatX)
+        out_vals = np.asarray(np.random.rand(3, 13, 9, 11), dtype=aesara.config.floatX)
 
         dilations = [(1, 1), (2, 2)] if dnn.version() >= 6000 else [(1, 1)]
         for border_mode, subsample, dilation, conv_mode in product(
@@ -969,13 +962,13 @@ def test_conv_gradi(self):
             shape = get_conv_gradinputs_shape(
                 kern_vals.shape, out_vals.shape, border_mode, subsample, dilation
             )
-            img_vals = np.zeros(shape, dtype=theano.config.floatX)
+            img_vals = np.zeros(shape, dtype=aesara.config.floatX)
             desc = dnn.GpuDnnConvDesc(
                 border_mode=border_mode,
                 subsample=subsample,
                 dilation=dilation,
                 conv_mode=conv_mode,
-                precision=set_precision(theano.config.floatX),
+                precision=set_precision(aesara.config.floatX),
             )(kerns.shape)
             conv_grad_i = dnn.GpuDnnConvGradI()(
                 kerns,
@@ -992,7 +985,7 @@ def test_conv_gradi(self):
 
     def test_pool(self):
         img = tensor4("img")
-        img_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX)
+        img_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=aesara.config.floatX)
 
         modes = get_dnn_pool_modes()
 
@@ -1008,7 +1001,7 @@ def test_pool(self):
 
     def test_pool_3d(self):
         img = tensor5("img")
-        img_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX)
+        img_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=aesara.config.floatX)
 
         modes = get_dnn_pool_modes()
 
@@ -1026,11 +1019,11 @@ def test_pool_grad(self):
         img = tensor4("img")
         img_grad = tensor4("img_grad")
         out = tensor4("out")
-        img_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX)
+        img_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=aesara.config.floatX)
         img_grad_val = np.asarray(
-            np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX
+            np.random.rand(2, 3, 4, 5), dtype=aesara.config.floatX
         )
-        out_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=theano.config.floatX)
+        out_val = np.asarray(np.random.rand(2, 3, 4, 5), dtype=aesara.config.floatX)
 
         for params in product(
             [(1, 1), (2, 2), (3, 3)],
@@ -1052,11 +1045,11 @@ def test_pool_3d_grad(self):
         img = tensor5("img")
         img_grad = tensor5("img_grad")
         out = tensor5("out")
-        img_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX)
+        img_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=aesara.config.floatX)
         img_grad_val = np.asarray(
-            np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX
+            np.random.rand(2, 3, 4, 5, 6), dtype=aesara.config.floatX
         )
-        out_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=theano.config.floatX)
+        out_val = np.asarray(np.random.rand(2, 3, 4, 5, 6), dtype=aesara.config.floatX)
 
         for params in product(
             [(1, 1, 1), (2, 2, 2), (3, 3, 3)],
@@ -1101,23 +1094,23 @@ def test_dnn_conv_alpha_output_merge():
     iw = 8
     kh = 2
     kw = 6
-    img_val = np.random.random((b, c, ih, iw)).astype(theano.config.floatX)
-    kern_val = np.random.random((f, c, kh, kw)).astype(theano.config.floatX)
+    img_val = np.random.random((b, c, ih, iw)).astype(aesara.config.floatX)
+    kern_val = np.random.random((f, c, kh, kw)).astype(aesara.config.floatX)
     out_val = np.random.random((b, f, ih - kh + 1, iw - kw + 1)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
 
     conv = dnn.dnn_conv(img, kern)
-    gw = theano.grad(conv.sum(), kern)
-    gi = theano.grad(conv.sum(), img)
+    gw = aesara.grad(conv.sum(), kern)
+    gi = aesara.grad(conv.sum(), img)
 
-    lr = np.asarray(0.05, dtype=theano.config.floatX)
+    lr = np.asarray(0.05, dtype=aesara.config.floatX)
 
     fr = lr * (conv + out)
     wr = kern + lr * gw
     ir = img + lr * gi
 
-    f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
+    f1 = aesara.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
     assert isinstance(
         f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv
     )
@@ -1136,7 +1129,7 @@ def test_dnn_conv_alpha_output_merge():
     mode = mode.excluding("local_dnn_convw_output_merge")
     mode = mode.excluding("local_dnn_convi_output_merge")
 
-    f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode)
+    f2 = aesara.function([img, kern, out], [fr, wr, ir], mode=mode)
 
     assert not isinstance(
         f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv
@@ -1167,10 +1160,10 @@ def test_dnn_conv_grad():
     iw = 8
     kh = 2
     kw = 2
-    img_val = np.random.random((b, c, ih, iw)).astype(theano.config.floatX)
-    kern_val = np.random.random((f, c, kh, kw)).astype(theano.config.floatX)
+    img_val = np.random.random((b, c, ih, iw)).astype(aesara.config.floatX)
+    kern_val = np.random.random((f, c, kh, kw)).astype(aesara.config.floatX)
     out_val = np.random.random((b, f, ih - kw + 1, iw - kw + 1)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
 
     def dconv(img, kern, out):
@@ -1179,7 +1172,7 @@ def dconv(img, kern, out):
             subsample=(1, 1),
             dilation=(1, 1),
             conv_mode="conv",
-            precision=set_precision(theano.config.floatX),
+            precision=set_precision(aesara.config.floatX),
         )(kern.shape)
         return dnn.GpuDnnConv()(img, kern, out, desc, alpha=0.5, beta=0.75)
 
@@ -1189,7 +1182,7 @@ def dconvi(img, kern, out):
             subsample=(1, 1),
             dilation=(1, 1),
             conv_mode="conv",
-            precision=set_precision(theano.config.floatX),
+            precision=set_precision(aesara.config.floatX),
         )(kern.shape)
         return dnn.GpuDnnConvGradI()(kern, out, img, desc, alpha=-1.0, beta=0.0)
 
@@ -1199,7 +1192,7 @@ def dconvw(img, kern, out):
             subsample=(1, 1),
             dilation=(1, 1),
             conv_mode="conv",
-            precision=set_precision(theano.config.floatX),
+            precision=set_precision(aesara.config.floatX),
         )(kern.shape)
         return dnn.GpuDnnConvGradW()(img, out, kern, desc, alpha=0.75, beta=-1.0)
 
@@ -1275,8 +1268,8 @@ def run_conv_small_batched_vs_multicall(inputs_shape, filters_shape, batch_sub):
     # due to float rounding
     inputs_val /= 10
     filters_val /= 10
-    inputs = theano.shared(inputs_val)
-    filters = theano.shared(filters_val)
+    inputs = aesara.shared(inputs_val)
+    filters = aesara.shared(filters_val)
 
     if len(inputs_shape) == 5:
         dnn_func = dnn.dnn_conv3d
@@ -1288,7 +1281,7 @@ def run_conv_small_batched_vs_multicall(inputs_shape, filters_shape, batch_sub):
     sub_conv_bottom = dnn_func(
         None, img=inputs[(batch_size - batch_sub) :], kerns=filters, algo=algo
     )
-    f = theano.function([], [conv, sub_conv_top, sub_conv_bottom], mode=mode_with_gpu)
+    f = aesara.function([], [conv, sub_conv_top, sub_conv_bottom], mode=mode_with_gpu)
     res_all, res_batch_top, res_batch_bottom = f()
     for i in range(batch_sub):
         # Check first ouputs.
@@ -1324,18 +1317,18 @@ def run_conv3d_fwd(
         inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode
     ):
 
-        inputs_val = np.random.random(inputs_shape).astype(theano.config.floatX)
-        filters_val = np.random.random(filters_shape).astype(theano.config.floatX)
+        inputs_val = np.random.random(inputs_shape).astype(aesara.config.floatX)
+        filters_val = np.random.random(filters_shape).astype(aesara.config.floatX)
 
         # Scale down the input values to prevent very large absolute errors
         # due to float rounding
         inputs_val /= 10
         filters_val /= 10
 
-        inputs = theano.shared(inputs_val)
-        filters = theano.shared(filters_val)
+        inputs = aesara.shared(inputs_val)
+        filters = aesara.shared(filters_val)
 
-        # Compile a theano function for the cuDNN implementation
+        # Compile an Aesara function for the cuDNN implementation
         conv = dnn.dnn_conv3d(
             None,
             img=inputs,
@@ -1345,7 +1338,7 @@ def run_conv3d_fwd(
             dilation=dilation,
             conv_mode=conv_mode,
         )
-        f = theano.function([], conv, mode=mode_with_gpu)
+        f = aesara.function([], conv, mode=mode_with_gpu)
 
         # If conv_mode is 'conv' the reference implementation should use
         # filters flipped according to the width, height and time axis
@@ -1354,13 +1347,13 @@ def run_conv3d_fwd(
         else:
             flipped_filters = filters
 
-        # Compile a theano function for the reference implementation
+        # Compile an Aesara function for the reference implementation
         conv_ref = Corr3dMM(
             border_mode=border_mode,
             subsample=subsample,
             filter_dilation=dilation,
         )(ref_cast(inputs), flipped_filters)
-        f_ref = theano.function([], conv_ref, mode="FAST_RUN")
+        f_ref = aesara.function([], conv_ref, mode="FAST_RUN")
 
         # Compare the results of the two implementations
         res_ref = f_ref()
@@ -1368,7 +1361,7 @@ def run_conv3d_fwd(
         # raise rtol to make the test pass with more seed.
         rtol = None
         # Raise tolerance for float16
-        if theano.config.floatX == "float16":
+        if aesara.config.floatX == "float16":
             rtol = 6e-2
         utt.assert_allclose(res_ref, res, rtol=rtol)
 
@@ -1391,13 +1384,13 @@ def run_conv3d_bwd(
         inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode
     ):
 
-        inputs_val = np.random.random(inputs_shape).astype(theano.config.floatX)
-        filters_val = np.random.random(filters_shape).astype(theano.config.floatX)
+        inputs_val = np.random.random(inputs_shape).astype(aesara.config.floatX)
+        filters_val = np.random.random(filters_shape).astype(aesara.config.floatX)
 
-        inputs = theano.shared(inputs_val)
-        filters = theano.shared(filters_val)
+        inputs = aesara.shared(inputs_val)
+        filters = aesara.shared(filters_val)
 
-        # Compile a theano function for the cuDNN implementation
+        # Compile an Aesara function for the cuDNN implementation
         conv = dnn.dnn_conv3d(
             None,
             img=inputs,
@@ -1408,9 +1401,9 @@ def run_conv3d_bwd(
             conv_mode=conv_mode,
         )
 
-        grad_i, grad_w = theano.grad(conv.sum(), [inputs, filters])
+        grad_i, grad_w = aesara.grad(conv.sum(), [inputs, filters])
 
-        f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu)
+        f = aesara.function([], [grad_i, grad_w], mode=mode_with_gpu)
 
         # If conv_mode is 'conv' the reference implementation should use
         # filters flipped according to the width, height and time axis
@@ -1419,14 +1412,14 @@ def run_conv3d_bwd(
         else:
             flipped_filters = filters
 
-        # Compile a theano function for the reference implementation
+        # Compile an Aesara function for the reference implementation
         conv_ref = Corr3dMM(
             border_mode=border_mode,
             subsample=subsample,
             filter_dilation=dilation,
         )(ref_cast(inputs), flipped_filters)
-        (grad_i_ref, grad_w_ref) = theano.grad(conv_ref.sum(), [inputs, filters])
-        f_ref = theano.function([], [grad_i_ref, grad_w_ref], mode="FAST_RUN")
+        (grad_i_ref, grad_w_ref) = aesara.grad(conv_ref.sum(), [inputs, filters])
+        f_ref = aesara.function([], [grad_i_ref, grad_w_ref], mode="FAST_RUN")
 
         # Compare the results of the two implementations
         res_ref = f_ref()
@@ -1435,7 +1428,7 @@ def run_conv3d_bwd(
         # raise rtol to make the test pass with more seed.
         rtol = None
         # Raise tolerance for float16
-        if theano.config.floatX == "float16":
+        if aesara.config.floatX == "float16":
             rtol = 5e-2
         elif max(inputs_shape) > 1024 or max(filters_shape) > 1024:
             rtol = 2e-5
@@ -1458,19 +1451,19 @@ class TestSoftMax(test_nnet.TestSoftMax):
 
     def test_softmax_shape_0(self):
         dims = (2, 0, 4, 5)
-        data = np.arange(np.product(dims), dtype=theano.config.floatX).reshape(dims)
+        data = np.arange(np.product(dims), dtype=aesara.config.floatX).reshape(dims)
 
         # Verify the forward op
         x_gpu = tensor4("x_gpu")
         f_gpu = dnn.GpuDnnSoftmax("accurate", "channel")(x_gpu)
-        f_gpu = theano.function([x_gpu], f_gpu, mode=self.mode)
+        f_gpu = aesara.function([x_gpu], f_gpu, mode=self.mode)
         assert f_gpu(data).shape == dims
 
         # Verify the gradient op
         dy_gpu = tensor4("dy_gpu")
         sm_gpu = tensor4("sm_gpu")
         f_grad_gpu = dnn.GpuDnnSoftmaxGrad("accurate", "channel")(dy_gpu, sm_gpu)
-        f_grad_gpu = theano.function([dy_gpu, sm_gpu], f_grad_gpu, mode=self.mode)
+        f_grad_gpu = aesara.function([dy_gpu, sm_gpu], f_grad_gpu, mode=self.mode)
         assert f_grad_gpu(data, data).shape == dims
 
     def test_softmax_f16(self):
@@ -1491,7 +1484,7 @@ def cmp(n, m, f, f_gpu):
 
     def test_softmax_grad(self):
         def cmp(n, m, f, f_gpu):
-            data = np.arange(n * m, dtype=theano.config.floatX).reshape(n, m)
+            data = np.arange(n * m, dtype=aesara.config.floatX).reshape(n, m)
             gdata = np.asarray(data)[:, :, None, None]
 
             out = f(data)
@@ -1505,7 +1498,7 @@ def cmp(n, m, f, f_gpu):
 
         # Verify the grad operation
         dims = (2, 3, 4, 5)
-        gdata = np.arange(np.product(dims), dtype=theano.config.floatX).reshape(dims)
+        gdata = np.arange(np.product(dims), dtype=aesara.config.floatX).reshape(dims)
         utt.verify_grad(f_gpu, [gdata], rng=np.random, mode=mode_with_gpu)
 
         # Verify that the CPU and GPU implementations return the same results
@@ -1518,9 +1511,9 @@ def cmp(n, m, f, f_gpu):
         # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
         # optimization is applied when cudnn is required
         y = vector("y")
-        f = theano.function([y], theano.grad(softmax(y).mean(), y), mode=mode_with_gpu)
+        f = aesara.function([y], aesara.grad(softmax(y).mean(), y), mode=mode_with_gpu)
         sorted_f = f.maker.fgraph.toposort()
-        val = np.random.rand(5).astype(theano.config.floatX)
+        val = np.random.rand(5).astype(aesara.config.floatX)
         out_dnn = f(val)
         assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1
         assert len([i for i in sorted_f if isinstance(i.op, SoftmaxGrad)]) == 0
@@ -1530,7 +1523,7 @@ def cmp(n, m, f, f_gpu):
         # available
         mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
         y = vector("y")
-        f = theano.function([y], theano.grad(softmax(y).mean(), y), mode=mode_wo_cudnn)
+        f = aesara.function([y], aesara.grad(softmax(y).mean(), y), mode=mode_wo_cudnn)
         sorted_f = f.maker.fgraph.toposort()
         out_cpu = f(val)
         utt.assert_allclose(out_dnn, out_cpu)
@@ -1541,7 +1534,7 @@ def cmp(n, m, f, f_gpu):
         # crash with manual graph
         y = vector("y")
         o = SoftmaxGrad()(y, y * 2)
-        f = theano.function([y], o, mode=mode_with_gpu)
+        f = aesara.function([y], o, mode=mode_with_gpu)
         sorted_f = f.maker.fgraph.toposort()
         assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1
         assert len([i for i in sorted_f if isinstance(i.op, SoftmaxGrad)]) == 0
@@ -1556,7 +1549,7 @@ def test_log_softmax(self):
         softmax_out = dnn.GpuDnnSoftmax("accurate", "channel")(x)
         log_out = log(tt.as_tensor_variable(softmax_out))
 
-        f = theano.function([x], log_out, mode=mode_with_gpu)
+        f = aesara.function([x], log_out, mode=mode_with_gpu)
 
         # Ensure that the optimization has been applied
         dnn_softmax_nodes = [
@@ -1579,7 +1572,7 @@ def test_log_softmax(self):
         ]
 
         for inp_shape in input_shapes:
-            input_val = np.random.normal(0, 1, inp_shape).astype(theano.config.floatX)
+            input_val = np.random.normal(0, 1, inp_shape).astype(aesara.config.floatX)
 
             out = f(input_val)
             expected_out = np.log(
@@ -1598,11 +1591,11 @@ def test_log_softmax2(self):
         # Compile a reference function, on the CPU, to be used to validate the
         # results of the other function.
         x = matrix()
-        f_ref = theano.function([x], LogSoftmax()(x))
+        f_ref = aesara.function([x], LogSoftmax()(x))
 
         # Build the first graph and ensure that the optimization is applied
         log_softmax_out = LogSoftmax()(x)
-        f = theano.function([x], log_softmax_out, mode=mode_with_gpu)
+        f = aesara.function([x], log_softmax_out, mode=mode_with_gpu)
 
         dnn_softmax_nodes = [
             n for n in f.maker.fgraph.toposort() if isinstance(n.op, dnn.GpuDnnSoftmax)
@@ -1611,12 +1604,12 @@ def test_log_softmax2(self):
         assert dnn_softmax_nodes[0].op.algo == "log"
 
         # Compare the output of the function with the reference function
-        inp = np.random.normal(0, 1, (5, 6)).astype(theano.config.floatX)
+        inp = np.random.normal(0, 1, (5, 6)).astype(aesara.config.floatX)
         utt.assert_allclose(f(inp), f_ref(inp))
 
         # Build the first graph and ensure that the optimization is applied
         log_softmax_out = log(Softmax()(x))
-        f = theano.function([x], log_softmax_out, mode=mode_with_gpu)
+        f = aesara.function([x], log_softmax_out, mode=mode_with_gpu)
 
         dnn_softmax_nodes = [
             n for n in f.maker.fgraph.toposort() if isinstance(n.op, dnn.GpuDnnSoftmax)
@@ -1625,14 +1618,14 @@ def test_log_softmax2(self):
         assert dnn_softmax_nodes[0].op.algo == "log"
 
         # Compare the output of the function with the reference function
-        inp = np.random.normal(0, 1, (5, 6)).astype(theano.config.floatX)
+        inp = np.random.normal(0, 1, (5, 6)).astype(aesara.config.floatX)
         utt.assert_allclose(f(inp), f_ref(inp))
 
 
 def dnn_reduction(nd, idtype, acc_dtype, odtype):
     inp = TensorType(idtype, (False,) * nd)()
     res = inp.sum(acc_dtype=acc_dtype, dtype=odtype)
-    f = theano.function([inp], res, mode=mode_with_gpu)
+    f = aesara.function([inp], res, mode=mode_with_gpu)
     assert any(
         isinstance(n.op, dnn.GpuDnnReduction) for n in f.maker.fgraph.apply_nodes
     )
@@ -1656,12 +1649,12 @@ def test_dnn_reduction_sum_squares():
     M = matrix()
     for axis in (None, 0, 1):
         out = (M ** 2).sum(axis=axis)
-        f = theano.function([M], out, mode=mode_with_gpu)
+        f = aesara.function([M], out, mode=mode_with_gpu)
         assert any(
             isinstance(node.op, dnn.GpuDnnReduction) and node.op.red_op == "norm2"
             for node in f.maker.fgraph.apply_nodes
         )
-        M_val = np.random.random((4, 5)).astype(theano.config.floatX)
+        M_val = np.random.random((4, 5)).astype(aesara.config.floatX)
         utt.assert_allclose((M_val ** 2).sum(axis=axis), f(M_val))
 
 
@@ -1670,12 +1663,12 @@ def test_dnn_reduction_sum_abs():
     M = matrix()
     for axis in (None, 0, 1):
         out = abs(M).sum(axis=axis)
-        f = theano.function([M], out, mode=mode_with_gpu)
+        f = aesara.function([M], out, mode=mode_with_gpu)
         assert any(
             isinstance(node.op, dnn.GpuDnnReduction) and node.op.red_op == "norm1"
             for node in f.maker.fgraph.apply_nodes
         )
-        M_val = np.random.random((4, 5)).astype(theano.config.floatX)
+        M_val = np.random.random((4, 5)).astype(aesara.config.floatX)
         utt.assert_allclose(np.abs(M_val).sum(axis=axis), f(M_val))
 
 
@@ -1684,12 +1677,12 @@ def test_dnn_reduction_absmax():
     M = matrix()
     for axis in (None, 0, 1):
         out = abs(M).max(axis=axis)
-        f = theano.function([M], out, mode=mode_with_gpu)
+        f = aesara.function([M], out, mode=mode_with_gpu)
         assert any(
             isinstance(node.op, dnn.GpuDnnReduction) and node.op.red_op == "absmax"
             for node in f.maker.fgraph.apply_nodes
         )
-        M_val = np.random.random((4, 5)).astype(theano.config.floatX)
+        M_val = np.random.random((4, 5)).astype(aesara.config.floatX)
         utt.assert_allclose(np.max(np.abs(M_val), axis=axis), f(M_val))
 
 
@@ -1710,13 +1703,13 @@ def test_dnn_reduction_axis_size_one():
             sum_abs = abs(x).sum(axis=axis)
             absmax = abs(x).max(axis=axis)
 
-            cpu_f = theano.function(
+            cpu_f = aesara.function(
                 [x], [sum, sum_squares, sum_abs, absmax], mode=mode_without_gpu
             )
-            f1 = theano.function([x], sum, mode=mode_with_gpu)
-            f2 = theano.function([x], sum_squares, mode=mode_with_gpu)
-            f3 = theano.function([x], sum_abs, mode=mode_with_gpu)
-            f4 = theano.function([x], absmax, mode=mode_with_gpu)
+            f1 = aesara.function([x], sum, mode=mode_with_gpu)
+            f2 = aesara.function([x], sum_squares, mode=mode_with_gpu)
+            f3 = aesara.function([x], sum_abs, mode=mode_with_gpu)
+            f4 = aesara.function([x], absmax, mode=mode_with_gpu)
 
             for fn, red_op in (
                 (f1, "add"),
@@ -1764,7 +1757,7 @@ def dnn_reduction_strides(shp, shuffle, slice):
     inp = GpuArrayType("float32", (False,) * len(shp), context_name=test_ctx_name)()
     tmp = inp.dimshuffle(shuffle)[slice]
     res = tmp.sum(acc_dtype="float32", dtype="float32")
-    f = theano.function([inp], res, mode=mode_with_gpu)
+    f = aesara.function([inp], res, mode=mode_with_gpu)
     assert any(
         isinstance(n.op, dnn.GpuDnnReduction) for n in f.maker.fgraph.apply_nodes
     )
@@ -1789,10 +1782,10 @@ def test_dnn_reduction_error():
 
     slow_output = np.sum(slow_output.transpose(), axis=1)
 
-    vecT = vector(dtype=theano.config.floatX)
+    vecT = vector(dtype=aesara.config.floatX)
     outputT = tt.alloc(2.0 * vecT, 5, vecT.shape[0])
     outputSummedT = tt_sum(tt.transpose(outputT), axis=1)
-    f3 = theano.function(inputs=[vecT], outputs=outputSummedT)
+    f3 = aesara.function(inputs=[vecT], outputs=outputSummedT)
 
     output = f3(vec)
     utt.assert_allclose(slow_output, output)
@@ -1801,7 +1794,7 @@ def test_dnn_reduction_error():
 def dnn_maxargmax(nd, idtype, axis):
     inp = TensorType(idtype, (False,) * nd)()
     res = max_and_argmax(inp, axis=axis)
-    f = theano.function([inp], res, mode=mode_with_gpu)
+    f = aesara.function([inp], res, mode=mode_with_gpu)
     assert any(
         isinstance(n.op, dnn.GpuDnnReduction) for n in f.maker.fgraph.apply_nodes
     )
@@ -1889,7 +1882,7 @@ def test_dnn_batchnorm_train():
             x_invstd_ref = inv(sqrt(x_var_ref + eps))
             scale_ref = tt.addbroadcast(scale, *axes)
             bias_ref = tt.addbroadcast(bias, *axes)
-            m = tt.cast(prod(x.shape) / prod(scale.shape), theano.config.floatX)
+            m = tt.cast(prod(x.shape) / prod(scale.shape), aesara.config.floatX)
             out_ref = (x - x_mean_ref) * (scale_ref * x_invstd_ref) + bias_ref
             out_running_mean_ref = (
                 running_mean * (1 - running_average_factor)
@@ -1901,18 +1894,18 @@ def test_dnn_batchnorm_train():
             )
             # backward pass
             dy = vartype("dy")
-            grads_gpu = theano.grad(
+            grads_gpu = aesara.grad(
                 None, wrt=[x, scale, bias], known_grads={out_gpu: dy}
             )
-            grads_abstract = theano.grad(
+            grads_abstract = aesara.grad(
                 None, wrt=[x, scale, bias], known_grads={out_abstract: dy}
             )
             # reference backward pass
-            grads_ref = theano.grad(
+            grads_ref = aesara.grad(
                 None, wrt=[x, scale, bias], known_grads={out_ref: dy}
             )
             # compile
-            f_gpu = theano.function(
+            f_gpu = aesara.function(
                 [x, scale, bias, running_mean, running_var, dy],
                 [
                     out_gpu,
@@ -1924,7 +1917,7 @@ def test_dnn_batchnorm_train():
                 + grads_gpu,
                 mode=mode_with_gpu,
             )
-            f_abstract = theano.function(
+            f_abstract = aesara.function(
                 [x, scale, bias, running_mean, running_var, dy],
                 [
                     out_abstract,
@@ -1936,7 +1929,7 @@ def test_dnn_batchnorm_train():
                 + grads_abstract,
                 mode=mode_with_gpu,
             )
-            f_ref = theano.function(
+            f_ref = aesara.function(
                 [x, scale, bias, running_mean, running_var, dy],
                 [
                     out_ref,
@@ -1984,14 +1977,14 @@ def test_dnn_batchnorm_train():
                 param_shape = tuple(
                     1 if d in axes else s for d, s in enumerate(data_shape)
                 )
-                X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-                Dy = -1 + 2 * np.random.randn(*data_shape).astype(theano.config.floatX)
-                Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
+                X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+                Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+                Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
                 Running_mean = np.random.randn(*param_shape).astype(
-                    theano.config.floatX
+                    aesara.config.floatX
                 )
-                Running_var = np.random.randn(*param_shape).astype(theano.config.floatX)
+                Running_var = np.random.randn(*param_shape).astype(aesara.config.floatX)
                 outputs_gpu = f_gpu(X, Scale, Bias, Running_mean, Running_var, Dy)
                 outputs_abstract = f_abstract(
                     X, Scale, Bias, Running_mean, Running_var, Dy
@@ -2050,17 +2043,17 @@ def test_dnn_batchnorm_train_without_running_averages():
         x_invstd_abstract,
     ) = batchnorm.batch_normalization_train(x, scale, bias, "per-activation")
     # backward pass
-    grads_gpu = theano.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy})
-    grads_abstract = theano.grad(
+    grads_gpu = aesara.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy})
+    grads_abstract = aesara.grad(
         None, wrt=[x, scale, bias], known_grads={out_abstract: dy}
     )
     # compile
-    f_gpu = theano.function(
+    f_gpu = aesara.function(
         [x, scale, bias, dy],
         [out_gpu, x_mean_gpu, x_invstd_gpu] + grads_gpu,
         mode=mode_with_gpu,
     )
-    f_abstract = theano.function(
+    f_abstract = aesara.function(
         [x, scale, bias, dy],
         [out_abstract, x_mean_abstract, x_invstd_abstract] + grads_abstract,
         mode=mode_with_gpu,
@@ -2092,10 +2085,10 @@ def test_dnn_batchnorm_train_without_running_averages():
         ]
     )
     # run
-    X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-    Dy = -1 + 2 * np.random.randn(*data_shape).astype(theano.config.floatX)
-    Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-    Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
+    X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+    Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+    Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+    Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
     f_gpu(X, Scale, Bias, Dy)
     f_abstract(X, Scale, Bias, Dy)
 
@@ -2121,11 +2114,11 @@ def test_without_dnn_batchnorm_train_without_running_averages():
         x_invstd_abstract,
     ) = batchnorm.batch_normalization_train(x, scale, bias, "per-activation")
     # backward pass
-    grads_abstract = theano.grad(
+    grads_abstract = aesara.grad(
         None, wrt=[x, scale, bias], known_grads={out_abstract: dy}
     )
     # compile
-    f_abstract = theano.function(
+    f_abstract = aesara.function(
         [x, scale, bias, dy],
         [out_abstract, x_mean_abstract, x_invstd_abstract] + grads_abstract,
         mode=mode_with_gpu.excluding("cudnn"),
@@ -2160,10 +2153,10 @@ def test_without_dnn_batchnorm_train_without_running_averages():
         [isinstance(n.op, dnn.GpuElemwise) for n in f_abstract.maker.fgraph.toposort()]
     )
     # run
-    X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-    Dy = -1 + 2 * np.random.randn(*data_shape).astype(theano.config.floatX)
-    Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-    Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
+    X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+    Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+    Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+    Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
     f_abstract(X, Scale, Bias, Dy)
 
 
@@ -2176,11 +2169,11 @@ def test_dnn_batchnorm_train_inplace():
     data_shape = (5, 10, 30, 25)
     param_shape = (1, 10, 30, 25)
     running_mean = gpuarray_shared_constructor(
-        np.random.randn(*param_shape).astype(theano.config.floatX),
+        np.random.randn(*param_shape).astype(aesara.config.floatX),
         broadcastable=(True, False, False, False),
     )
     running_var = gpuarray_shared_constructor(
-        np.random.randn(*param_shape).astype(theano.config.floatX),
+        np.random.randn(*param_shape).astype(aesara.config.floatX),
         broadcastable=(True, False, False, False),
     )
 
@@ -2206,7 +2199,7 @@ def test_dnn_batchnorm_train_inplace():
     updates[running_mean] = new_running_mean
     updates[running_var] = new_running_var
     # compile
-    f = theano.function(
+    f = aesara.function(
         [x, scale, bias], [out, x_mean, x_invstd], updates=updates, mode=mode_with_gpu
     )
     # check for the inplace settings
@@ -2218,9 +2211,9 @@ def test_dnn_batchnorm_train_inplace():
     assert nodes[0].op.inplace_running_var
     assert nodes[0].op.inplace_output
     # run
-    X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-    Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-    Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
+    X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+    Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+    Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
     f(X, Scale, Bias)
 
 
@@ -2261,28 +2254,28 @@ def test_batchnorm_inference():
             out_ref = (x - mean_ref) * (scale_ref / sqrt(var_ref + eps)) + bias_ref
             # backward pass
             dy = vartype("dy")
-            grads_gpu = theano.grad(
+            grads_gpu = aesara.grad(
                 None, wrt=[x, scale, bias, mean, var], known_grads={out_gpu: dy}
             )
-            grads_abstract = theano.grad(
+            grads_abstract = aesara.grad(
                 None, wrt=[x, scale, bias, mean, var], known_grads={out_abstract: dy}
             )
             # reference backward pass
-            grads_ref = theano.grad(
+            grads_ref = aesara.grad(
                 None, wrt=[x, scale, bias, mean, var], known_grads={out_ref: dy}
             )
             # compile
-            f_gpu = theano.function(
+            f_gpu = aesara.function(
                 [x, scale, bias, mean, var, dy],
                 [out_gpu] + grads_gpu,
                 mode=mode_with_gpu,
             )
-            f_abstract = theano.function(
+            f_abstract = aesara.function(
                 [x, scale, bias, mean, var, dy],
                 [out_abstract] + grads_abstract,
                 mode=mode_with_gpu,
             )
-            f_ref = theano.function(
+            f_ref = aesara.function(
                 [x, scale, bias, mean, var, dy], [out_ref] + grads_ref
             )
             # check if the abstract Ops have been replaced
@@ -2315,12 +2308,12 @@ def test_batchnorm_inference():
                 param_shape = tuple(
                     1 if d in axes else s for d, s in enumerate(data_shape)
                 )
-                X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-                Dy = -1 + 2 * np.random.randn(*data_shape).astype(theano.config.floatX)
-                Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Mean = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Var = np.random.rand(*param_shape).astype(theano.config.floatX)
+                X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+                Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+                Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Mean = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Var = np.random.rand(*param_shape).astype(aesara.config.floatX)
                 outputs_gpu = f_gpu(X, Scale, Bias, Mean, Var, Dy)
                 outputs_abstract = f_abstract(X, Scale, Bias, Mean, Var, Dy)
                 outputs_ref = f_ref(X, Scale, Bias, Mean, Var, Dy)
@@ -2360,7 +2353,7 @@ def test_batchnorm_inference_inplace():
     param_shape = (1, 10, 30, 25)
 
     out = dnn.dnn_batch_normalization_test(x, scale, bias, mean, var)
-    f = theano.function([x, scale, bias, mean, var], [out], mode=mode_with_gpu)
+    f = aesara.function([x, scale, bias, mean, var], [out], mode=mode_with_gpu)
 
     # check for the inplace settings
     nodes = [
@@ -2372,11 +2365,11 @@ def test_batchnorm_inference_inplace():
     assert nodes[0].op.inplace
 
     # run
-    X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-    Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-    Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
-    Mean = np.random.randn(*param_shape).astype(theano.config.floatX)
-    Var = np.random.rand(*param_shape).astype(theano.config.floatX)
+    X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+    Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+    Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
+    Mean = np.random.randn(*param_shape).astype(aesara.config.floatX)
+    Var = np.random.rand(*param_shape).astype(aesara.config.floatX)
     f(X, Scale, Bias, Mean, Var)
 
 
@@ -2401,14 +2394,14 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
             )
             # backward pass
             dy = vartype("dy")
-            grads_train = theano.grad(
+            grads_train = aesara.grad(
                 None, wrt=[x, scale, bias], known_grads={out_train: dy}
             )
-            grads_test = theano.grad(
+            grads_test = aesara.grad(
                 None, wrt=[x, scale, bias, mean, var], known_grads={out_test: dy}
             )
             # compile
-            f = theano.function(
+            f = aesara.function(
                 [x, scale, bias, mean, var, dy],
                 [out_train, x_mean, x_invstd, out_test] + grads_train + grads_test,
                 mode=mode_with_gpu,
@@ -2481,10 +2474,10 @@ def test_dnn_rnn_gru():
     Y = tensor3("Y")
     h0 = tensor3("h0")
 
-    rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "gru")
+    rnnb = dnn.RNNBlock(aesara.config.floatX, hidden_dim, depth, "gru")
     psize = rnnb.get_param_size([batch_size, input_dim])
     params_cudnn = gpuarray_shared_constructor(
-        np.zeros((psize,), dtype=theano.config.floatX)
+        np.zeros((psize,), dtype=aesara.config.floatX)
     )
 
     model = Model()
@@ -2506,8 +2499,8 @@ def funcs(out, params, hy=None):
             cost += mean((Y - out) ** 2)
         if hy:
             cost += mean(hy ** 2)
-        grad = theano.grad(cost, [X, h0] + params)
-        grad_fn = theano.function(
+        grad = aesara.grad(cost, [X, h0] + params)
+        grad_fn = aesara.function(
             [X, Y, h0], grad, mode=mode_with_gpu, on_unused_input="ignore"
         )
         return grad_fn
@@ -2521,8 +2514,8 @@ def funcs(out, params, hy=None):
 
     y, hy = rnnb.apply(params_cudnn, X, h0)
 
-    ref_fn = theano.function([X, h0], ref_y, mode=mode_with_gpu)
-    cudnn_fn = theano.function([X, h0], y, mode=mode_with_gpu)
+    ref_fn = aesara.function([X, h0], ref_y, mode=mode_with_gpu)
+    cudnn_fn = aesara.function([X, h0], y, mode=mode_with_gpu)
 
     # Test with grad connected to y
     ref_grad_fn = funcs(ref_y, model.get_params())
@@ -2540,13 +2533,13 @@ def funcs(out, params, hy=None):
     cudnn_grad_fns = [cudnn_grad_fn, cudnn2_grad_fn, cudnn3_grad_fn]
 
     x_val = np.random.random((timesteps, batch_size, input_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     y_val = np.random.random((timesteps, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     h0_val = np.random.random((depth, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
 
     ref_out = ref_fn(x_val, h0_val)
@@ -2591,11 +2584,11 @@ def test_dnn_rnn_gru_bidi():
     h0 = tensor3("h0")
 
     rnnb = dnn.RNNBlock(
-        theano.config.floatX, hidden_dim, depth, "gru", direction_mode="bidirectional"
+        aesara.config.floatX, hidden_dim, depth, "gru", direction_mode="bidirectional"
     )
     psize = rnnb.get_param_size([batch_size, input_dim])
     params_cudnn = gpuarray_shared_constructor(
-        np.random.random((psize,)).astype(theano.config.floatX)
+        np.random.random((psize,)).astype(aesara.config.floatX)
     )
 
     def funcs(out, params, hy=None):
@@ -2604,15 +2597,15 @@ def funcs(out, params, hy=None):
             cost += mean((Y - out) ** 2)
         if hy:
             cost += mean(hy ** 2)
-        grad = theano.grad(cost, [X, h0] + params)
-        grad_fn = theano.function(
+        grad = aesara.grad(cost, [X, h0] + params)
+        grad_fn = aesara.function(
             [X, Y, h0], grad, mode=mode_with_gpu, on_unused_input="ignore"
         )
         return grad_fn
 
     y, hy = rnnb.apply(params_cudnn, X, h0)
 
-    cudnn_fn = theano.function([X, h0], y, mode=mode_with_gpu)
+    cudnn_fn = aesara.function([X, h0], y, mode=mode_with_gpu)
 
     cudnn_grad_fn = funcs(y, [params_cudnn])
     cudnn2_grad_fn = funcs(y, [params_cudnn], hy)
@@ -2621,13 +2614,13 @@ def funcs(out, params, hy=None):
     cudnn_grad_fns = [cudnn_grad_fn, cudnn2_grad_fn, cudnn3_grad_fn]
 
     x_val = np.random.random((timesteps, batch_size, input_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     y_val = np.random.random((timesteps, batch_size, 2 * hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     h0_val = np.random.random((2 * depth, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
 
     cudnn_fn(x_val, h0_val)
@@ -2652,10 +2645,10 @@ def test_dnn_rnn_lstm():
     h0 = tensor3("h0")
     c0 = tensor3("c0")
 
-    rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "lstm")
+    rnnb = dnn.RNNBlock(aesara.config.floatX, hidden_dim, depth, "lstm")
     psize = rnnb.get_param_size([batch_size, input_dim])
     params_cudnn = gpuarray_shared_constructor(
-        np.zeros((psize,), dtype=theano.config.floatX)
+        np.zeros((psize,), dtype=aesara.config.floatX)
     )
 
     model = Model()
@@ -2672,10 +2665,10 @@ def test_dnn_rnn_lstm():
             p[:] = layer_params[j].get_value(borrow=True, return_internal_type=True)
 
     def funcs(out, params):
-        fn = theano.function([X, h0, c0], out, mode=mode_with_gpu)
+        fn = aesara.function([X, h0, c0], out, mode=mode_with_gpu)
         cost = mean((Y - out) ** 2)
-        grad = theano.grad(cost, [X, h0, c0] + params)
-        grad_fn = theano.function([X, Y, h0, c0], grad, mode=mode_with_gpu)
+        grad = aesara.grad(cost, [X, h0, c0] + params)
+        grad_fn = aesara.function([X, Y, h0, c0], grad, mode=mode_with_gpu)
         return fn, grad_fn
 
     ref_fn, ref_grad_fn = funcs(last_layer.output(), model.get_params())
@@ -2684,16 +2677,16 @@ def funcs(out, params):
     )
 
     x_val = np.random.random((timesteps, batch_size, input_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     y_val = np.random.random((timesteps, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     h0_val = np.random.random((depth, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     c0_val = np.random.random((depth, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
 
     ref_out = ref_fn(x_val, h0_val, c0_val)
@@ -2738,10 +2731,10 @@ def test_dnn_rnn_lstm_grad_c():
     h0 = tensor3("h0")
     c0 = tensor3("c0")
 
-    rnnb = dnn.RNNBlock(theano.config.floatX, hidden_dim, depth, "lstm")
+    rnnb = dnn.RNNBlock(aesara.config.floatX, hidden_dim, depth, "lstm")
     psize = rnnb.get_param_size([batch_size, input_dim])
     params_cudnn = gpuarray_shared_constructor(
-        np.zeros((psize,), dtype=theano.config.floatX)
+        np.zeros((psize,), dtype=aesara.config.floatX)
     )
 
     model = Model()
@@ -2759,8 +2752,8 @@ def test_dnn_rnn_lstm_grad_c():
 
     def funcs(out, params):
         cost = mean((CY - out) ** 2)
-        grad = theano.grad(cost, [X, h0, c0] + params)
-        grad_fn = theano.function([X, CY, h0, c0], grad, mode=mode_with_gpu)
+        grad = aesara.grad(cost, [X, h0, c0] + params)
+        grad_fn = aesara.function([X, CY, h0, c0], grad, mode=mode_with_gpu)
         return grad_fn
 
     _, _, cy = rnnb.apply(params_cudnn, X, h0, c0)
@@ -2772,16 +2765,16 @@ def funcs(out, params):
     cudnn_grad_fn = funcs(cy, [params_cudnn])
 
     x_val = np.random.random((timesteps, batch_size, input_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     cy_val = np.random.random((depth, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     h0_val = np.random.random((depth, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
     c0_val = np.random.random((depth, batch_size, hidden_dim)).astype(
-        theano.config.floatX
+        aesara.config.floatX
     )
 
     ref_grads = ref_grad_fn(x_val, cy_val, h0_val, c0_val)
@@ -2829,7 +2822,7 @@ def test_dnn_spatialtf():
     utt.seed_rng()
 
     """
-    Spatial Transformer implementation using Theano from Lasagne
+    Spatial Transformer implementation using Aesara from Lasagne
     Original author: skaae (https://github.com/skaae)
     """
 
@@ -2863,8 +2856,8 @@ def spatialtf_cpu(inp, theta, scale_height, scale_width, border_mode="nearest"):
     def _interpolate(im, x, y, out_height, out_width, border_mode):
         # *_f are floats
         num_batch, height, width, channels = im.shape
-        height_f = tt.cast(height, theano.config.floatX)
-        width_f = tt.cast(width, theano.config.floatX)
+        height_f = tt.cast(height, aesara.config.floatX)
+        width_f = tt.cast(width, aesara.config.floatX)
 
         # scale coordinates from [-1, 1] to [0, dimension - 1], where dimension
         # can be the width or height
@@ -2933,12 +2926,12 @@ def _interpolate(im, x, y, out_height, out_width, border_mode):
         return output
 
     def _linspace(start, stop, num):
-        # Theano linspace. Behaves similar to np.linspace
-        start = tt.cast(start, theano.config.floatX)
-        stop = tt.cast(stop, theano.config.floatX)
-        num = tt.cast(num, theano.config.floatX)
+        # aesara linspace. Behaves similar to np.linspace
+        start = tt.cast(start, aesara.config.floatX)
+        stop = tt.cast(stop, aesara.config.floatX)
+        num = tt.cast(num, aesara.config.floatX)
         step = (stop - start) / (num - 1)
-        return tt.arange(num, dtype=theano.config.floatX) * step + start
+        return tt.arange(num, dtype=aesara.config.floatX) * step + start
 
     def _meshgrid(height, width):
         # This function is the grid generator from eq. (1) in reference [1].
@@ -2947,10 +2940,10 @@ def _meshgrid(height, width):
         #                         np.linspace(-1, 1, height))
         #  ones = np.ones(np.prod(x_t.shape))
         #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
-        # It is implemented in Theano instead to support symbolic grid sizes.
+        # It is implemented in Aesara instead to support symbolic grid sizes.
         # Note: If the image size is known at layer construction time, we could
         # compute the meshgrid offline in numpy instead of doing it dynamically
-        # in Theano. However, it hardly affected performance when we tried.
+        # in Aesara. However, it hardly affected performance when we tried.
         x_t = dot(tt.ones((height, 1)), _linspace(-1.0, 1.0, width).dimshuffle("x", 0))
         y_t = dot(_linspace(-1.0, 1.0, height).dimshuffle(0, "x"), tt.ones((1, width)))
 
@@ -2961,14 +2954,14 @@ def _meshgrid(height, width):
         return grid
 
     img_dims = (5, 3, 16, 16)
-    img = np.random.random(size=img_dims).astype(theano.config.floatX)
+    img = np.random.random(size=img_dims).astype(aesara.config.floatX)
 
     scale_height = 0.25
     scale_width = 0.75
 
     # Transformation matrix
     transform = [[-1, 0, 0], [0, -1, 0]]
-    theta = np.asarray(img_dims[0] * [transform], dtype=theano.config.floatX)
+    theta = np.asarray(img_dims[0] * [transform], dtype=aesara.config.floatX)
 
     # Create symbolic variables for inputs and transformations
     t_img = tensor4("img")
@@ -2977,7 +2970,7 @@ def _meshgrid(height, width):
     st_dnn = dnn.dnn_spatialtf(
         t_img, t_theta, scale_height=scale_height, scale_width=scale_width
     )
-    st_dnn_func = theano.function([t_img, t_theta], st_dnn, mode=mode_with_gpu)
+    st_dnn_func = aesara.function([t_img, t_theta], st_dnn, mode=mode_with_gpu)
     # Check if function graph contains the spatial transformer's grid and sampler Ops
     apply_nodes = st_dnn_func.maker.fgraph.apply_nodes
     assert any([isinstance(node.op, dnn.GpuDnnTransformerGrid) for node in apply_nodes])
@@ -2990,11 +2983,11 @@ def _meshgrid(height, width):
 
     # Setup CPU Op
     st_cpu = spatialtf_cpu(t_img, t_theta, scale_height, scale_width, "nearest")
-    st_cpu_func = theano.function([t_img, t_theta], st_cpu, mode=mode_without_gpu)
+    st_cpu_func = aesara.function([t_img, t_theta], st_cpu, mode=mode_without_gpu)
     img_out_cpu = st_cpu_func(img, theta)
 
     atol, rtol = None, None
-    if theano.config.floatX == "float16":
+    if aesara.config.floatX == "float16":
         # Raise relative error tolerance when using float16
         rtol = 5e-2
     utt.assert_allclose(img_out_cpu, img_out_gpu, atol=atol, rtol=rtol)
@@ -3005,12 +2998,12 @@ def test_dnn_spatialtf_invalid_shapes():
     theta = tensor3("theta")
 
     st_dnn = dnn.dnn_spatialtf(inputs, theta)
-    st_dnn_func = theano.function([inputs, theta], st_dnn, mode=mode_with_gpu)
+    st_dnn_func = aesara.function([inputs, theta], st_dnn, mode=mode_with_gpu)
 
-    inputs_val = np.ones((3, 5, 7, 7), dtype=theano.config.floatX)
+    inputs_val = np.ones((3, 5, 7, 7), dtype=aesara.config.floatX)
 
     def try_theta_shp(theta_shp):
-        theta_val = np.ones(theta_shp, dtype=theano.config.floatX)
+        theta_val = np.ones(theta_shp, dtype=aesara.config.floatX)
         return st_dnn_func(inputs_val, theta_val)
 
     # the theta shape for this input should be (3, 2, 3)
@@ -3037,10 +3030,10 @@ def test_dnn_spatialtf_grad():
 
     out = dnn.dnn_spatialtf(inputs, theta, scale_height=0.25, scale_width=0.75)
     out_mean = mean(out)
-    mean_gi = theano.grad(out_mean, [inputs])
-    mean_gt = theano.grad(out_mean, [theta])
+    mean_gi = aesara.grad(out_mean, [inputs])
+    mean_gt = aesara.grad(out_mean, [theta])
 
-    f_gi = theano.function([inputs, theta], mean_gi, mode=mode_with_gpu)
+    f_gi = aesara.function([inputs, theta], mean_gi, mode=mode_with_gpu)
     assert any(
         [
             isinstance(node.op, dnn.GpuDnnTransformerGradI)
@@ -3048,7 +3041,7 @@ def test_dnn_spatialtf_grad():
         ]
     )
 
-    f_gt = theano.function([inputs, theta], mean_gt, mode=mode_with_gpu)
+    f_gt = aesara.function([inputs, theta], mean_gt, mode=mode_with_gpu)
     assert any(
         [
             isinstance(node.op, dnn.GpuDnnTransformerGradT)
@@ -3057,10 +3050,10 @@ def test_dnn_spatialtf_grad():
     )
 
     input_dims = (5, 3, 16, 16)
-    inputs_val = np.random.random(size=input_dims).astype(theano.config.floatX)
+    inputs_val = np.random.random(size=input_dims).astype(aesara.config.floatX)
 
     # Tensor with transformations
-    theta_val = np.random.random((input_dims[0], 2, 3)).astype(theano.config.floatX)
+    theta_val = np.random.random((input_dims[0], 2, 3)).astype(aesara.config.floatX)
     # Using smaller values for theta, increases the precision of gradients
     # when using lower precision. Tests might fail for lower precision data
     # types if the values of theta or the inputs are very high.
@@ -3075,9 +3068,9 @@ def grad_functor(inputs, theta):
         return out
 
     atol, rtol = None, None
-    if theano.config.floatX == "float32":
+    if aesara.config.floatX == "float32":
         rtol = 5e-2
-    elif theano.config.floatX == "float16":
+    elif aesara.config.floatX == "float16":
         rtol = 1e-0
 
     utt.verify_grad(
@@ -3132,7 +3125,7 @@ def run_fwd_runtime_algorithm(algo):
                 subsample=unit_shape,
                 dilation=unit_shape,
             )
-            f = theano.function([inputs, filters], conv, mode=mode_with_gpu)
+            f = aesara.function([inputs, filters], conv, mode=mode_with_gpu)
             if self.ndim == 3:
                 flipped_filters = lower_filters[:, :, ::-1, ::-1, ::-1]
             else:
@@ -3140,7 +3133,7 @@ def run_fwd_runtime_algorithm(algo):
             conv_ref = self.cpu_conv_class(subsample=unit_shape)(
                 ref_cast(lower_inputs), flipped_filters
             )
-            f_ref = theano.function([inputs, filters], conv_ref, mode="FAST_RUN")
+            f_ref = aesara.function([inputs, filters], conv_ref, mode="FAST_RUN")
             runtime_shapes = self.runtime_shapes
             if algo in ("time_once", "guess_once"):
                 runtime_shapes = [list(runtime_shapes[0])]
@@ -3164,7 +3157,7 @@ def test_gradinput_runtime_algorithms(self):
         _broadcastable = [False] * (2 + self.ndim)
 
         def run_gradinput_runtime_algorithm(algo):
-            theano.config.dnn__conv__algo_bwd_data = algo
+            aesara.config.dnn__conv__algo_bwd_data = algo
             inputs = TensorType(dtype, _broadcastable)()
             filters = TensorType(dtype, _broadcastable)()
             conv = dnn.dnn_conv(
@@ -3175,8 +3168,8 @@ def run_gradinput_runtime_algorithm(algo):
                 subsample=unit_shape,
                 dilation=unit_shape,
             )
-            (grad_i,) = theano.grad(conv.sum(), [inputs])
-            f = theano.function([inputs, filters], grad_i, mode=mode_with_gpu)
+            (grad_i,) = aesara.grad(conv.sum(), [inputs])
+            f = aesara.function([inputs, filters], grad_i, mode=mode_with_gpu)
             assert 1 == len(
                 [
                     node
@@ -3199,8 +3192,8 @@ def run_gradinput_runtime_algorithm(algo):
             conv_ref = self.cpu_conv_class(subsample=unit_shape)(
                 ref_cast(inputs), flipped_filters
             )
-            (grad_i_ref,) = theano.grad(conv_ref.sum(), [inputs])
-            f_ref = theano.function([inputs, filters], grad_i_ref, mode="FAST_RUN")
+            (grad_i_ref,) = aesara.grad(conv_ref.sum(), [inputs])
+            f_ref = aesara.function([inputs, filters], grad_i_ref, mode="FAST_RUN")
             runtime_shapes = self.runtime_shapes
             if algo in ("time_once", "guess_once"):
                 runtime_shapes = [list(runtime_shapes[0])]
@@ -3222,7 +3215,7 @@ def test_gradweight_runtime_algorithms(self):
         _broadcastable = [False] * (2 + self.ndim)
 
         def run_gradweight_runtime_algorithm(algo):
-            theano.config.dnn__conv__algo_bwd_filter = algo
+            aesara.config.dnn__conv__algo_bwd_filter = algo
             inputs = TensorType(dtype, _broadcastable)()
             filters = TensorType(dtype, _broadcastable)()
             conv = dnn.dnn_conv(
@@ -3233,8 +3226,8 @@ def run_gradweight_runtime_algorithm(algo):
                 subsample=unit_shape,
                 dilation=unit_shape,
             )
-            (grad_w,) = theano.grad(conv.sum(), [filters])
-            f = theano.function([inputs, filters], grad_w, mode=mode_with_gpu)
+            (grad_w,) = aesara.grad(conv.sum(), [filters])
+            f = aesara.function([inputs, filters], grad_w, mode=mode_with_gpu)
             assert 1 == len(
                 [
                     node
@@ -3257,8 +3250,8 @@ def run_gradweight_runtime_algorithm(algo):
             conv_ref = self.cpu_conv_class(subsample=unit_shape)(
                 ref_cast(inputs), flipped_filters
             )
-            (grad_w_ref,) = theano.grad(conv_ref.sum(), [filters])
-            f_ref = theano.function([inputs, filters], grad_w_ref, mode="FAST_RUN")
+            (grad_w_ref,) = aesara.grad(conv_ref.sum(), [filters])
+            f_ref = aesara.function([inputs, filters], grad_w_ref, mode="FAST_RUN")
             runtime_shapes = self.runtime_shapes
             if algo in ("time_once", "guess_once"):
                 runtime_shapes = [list(runtime_shapes[0])]
@@ -3303,8 +3296,8 @@ def get_function(dtype, precision):
         filters_val = np.random.random(filters_shape).astype(dtype)
         inputs_val /= 10
         filters_val /= 10
-        inputs = theano.shared(inputs_val)
-        filters = theano.shared(filters_val)
+        inputs = aesara.shared(inputs_val)
+        filters = aesara.shared(filters_val)
         conv = dnn.dnn_conv(
             img=inputs,
             kerns=filters,
@@ -3313,7 +3306,7 @@ def get_function(dtype, precision):
             algo="guess_once",
             direction_hint="forward!",
         )
-        return theano.function([], conv, mode=mode_with_gpu)
+        return aesara.function([], conv, mode=mode_with_gpu)
 
     f_true_half_config = get_function("float16", "float16")
     f_pseudo_half_config = get_function("float16", "float32")
@@ -3335,7 +3328,7 @@ def test_opt_f16_prec32():
     filters = TensorType("float16", (False,) * 4)()
     conv = conv2d(inputs, filters)
 
-    gfilt = theano.grad(conv.sum(), filters)
+    gfilt = aesara.grad(conv.sum(), filters)
 
     # If this compiles we are good
-    theano.function([inputs, filters], [conv, gfilt], mode=mode_with_gpu)
+    aesara.function([inputs, filters], [conv, gfilt], mode=mode_with_gpu)
diff --git a/tests/gpuarray/test_elemwise.py b/tests/gpuarray/test_elemwise.py
index 58a0218a59..e4d6af2362 100644
--- a/tests/gpuarray/test_elemwise.py
+++ b/tests/gpuarray/test_elemwise.py
@@ -1,9 +1,9 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.scalar as ts
-import theano.tensor as tt
+import aesara
+import aesara.scalar as ts
+import aesara.tensor as tt
 
 
 pygpu = pytest.importorskip("pygpu")
@@ -11,14 +11,10 @@
 
 from copy import copy
 
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, test_ctx_name
-from tests.gpuarray.test_basic_ops import rand_gpuarray
-from tests.tensor import test_elemwise
-from tests.unittest_tools import assert_allclose
-from theano.compile.debugmode import DebugMode
-from theano.compile.mode import Mode
-from theano.gpuarray.dnn import GpuDnnReduction
-from theano.gpuarray.elemwise import (
+from aesara.compile.debugmode import DebugMode
+from aesara.compile.mode import Mode
+from aesara.gpuarray.dnn import GpuDnnReduction
+from aesara.gpuarray.elemwise import (
     GpuCAReduceCPY,
     GpuCAReduceCuda,
     GpuDimShuffle,
@@ -26,11 +22,15 @@
     GpuErfcinv,
     GpuErfinv,
 )
-from theano.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
-from theano.link.basic import PerformLinker
-from theano.link.c.basic import CLinker
-from theano.tensor.math import erfcinv, erfinv, mul, tanh
-from theano.tensor.type import bvector, float_dtypes, fmatrix, fvector, vector
+from aesara.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
+from aesara.link.basic import PerformLinker
+from aesara.link.c.basic import CLinker
+from aesara.tensor.math import erfcinv, erfinv, mul, tanh
+from aesara.tensor.type import bvector, float_dtypes, fmatrix, fvector, vector
+from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, test_ctx_name
+from tests.gpuarray.test_basic_ops import rand_gpuarray
+from tests.tensor import test_elemwise
+from tests.unittest_tools import assert_allclose
 
 
 # This is actually a test for GpuElemwise
@@ -72,7 +72,7 @@ def test_elemwise_pow():
             exp = gpuarray_shared_constructor(exp_val)
             assert exp.dtype == dtype_exp
             output = base ** exp
-            f = theano.function([base], output, mode=mode_with_gpu)
+            f = aesara.function([base], output, mode=mode_with_gpu)
             # We don't transfer to the GPU when the output dtype is int*
             n = len(
                 [n for n in f.maker.fgraph.apply_nodes if isinstance(n.op, GpuElemwise)]
@@ -120,26 +120,26 @@ def setup_class(cls):
         else:
             cls.mode_without_gpu = mode_without_gpu
 
-    def check_gpu_scalar_op(self, theano_function, scalar_optype):
-        for node in theano_function.maker.fgraph.apply_nodes:
+    def check_gpu_scalar_op(self, aesara_function, scalar_optype):
+        for node in aesara_function.maker.fgraph.apply_nodes:
             if isinstance(node.op, GpuElemwise) and isinstance(
                 node.op.scalar_op, scalar_optype
             ):
                 return True
-        theano.printing.debugprint(theano_function)
+        aesara.printing.debugprint(aesara_function)
         return False
 
     def test_elemwise_erfinv(self):
         for dtype in self.dtypes:
             vec = vector(dtype=dtype)
             output = erfinv(vec)
-            f_host = theano.function(
+            f_host = aesara.function(
                 [vec],
                 output,
                 name="HOST/erfinv/" + dtype,
                 mode=self.mode_without_gpu,
             )
-            f_gpu = theano.function(
+            f_gpu = aesara.function(
                 [vec], output, name="GPU/erfinv/" + dtype, mode=self.mode_with_gpu
             )
             assert (
@@ -152,7 +152,7 @@ def test_elemwise_erfinv(self):
                 )
                 == 0
             )
-            if not theano.config.device.startswith("opencl"):
+            if not aesara.config.device.startswith("opencl"):
                 assert self.check_gpu_scalar_op(
                     f_gpu, GpuErfinv
                 ), 'Function graph does not contains scalar op "GpuErfinv".'
@@ -168,13 +168,13 @@ def test_elemwise_erfcinv(self):
         for dtype in self.dtypes:
             vec = vector(dtype=dtype)
             output = erfcinv(vec)
-            f_host = theano.function(
+            f_host = aesara.function(
                 [vec],
                 output,
                 name="HOST/erfcinv/" + dtype,
                 mode=self.mode_without_gpu,
             )
-            f_gpu = theano.function(
+            f_gpu = aesara.function(
                 [vec], output, name="GPU/erfcinv/" + dtype, mode=self.mode_with_gpu
             )
             assert (
@@ -187,7 +187,7 @@ def test_elemwise_erfcinv(self):
                 )
                 == 0
             )
-            if not theano.config.device.startswith("opencl"):
+            if not aesara.config.device.startswith("opencl"):
                 assert self.check_gpu_scalar_op(
                     f_gpu, GpuErfcinv
                 ), 'Function graph does not contains scalar op "GpuErfcinv".'
@@ -216,7 +216,7 @@ def test_composite_elemwise_float16(self):
             - tt.constant(np.float16(1.0))
         )
 
-        theano.function([w, x, y], o, mode=mode_with_gpu)
+        aesara.function([w, x, y], o, mode=mode_with_gpu)
 
         v = vector(dtype="uint8")
         w = vector(dtype="float16")
@@ -225,13 +225,13 @@ def test_composite_elemwise_float16(self):
         z = vector(dtype="float16")
 
         o = tt.switch(v, mul(w, x, y), z)
-        theano.function([v, w, x, y, z], o, mode=mode_with_gpu)
+        aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
 
     def test_cast_float16(self):
         f16 = vector(dtype="float16")
         f32 = fvector()
         i8 = bvector()
-        f = theano.function(
+        f = aesara.function(
             [f16, f32, i8],
             [
                 f16.astype("float32"),
@@ -509,5 +509,5 @@ def setup_method(self):
 def speed_reduce10():
     data = np.random.rand(1000, 1000).astype("float32")
     m = fmatrix()
-    f = theano.function([m], [m.sum(axis=0), m.T.sum(axis=0)], mode=mode_with_gpu)
+    f = aesara.function([m], [m.sum(axis=0), m.T.sum(axis=0)], mode=mode_with_gpu)
     f(data)
diff --git a/tests/gpuarray/test_extra_ops.py b/tests/gpuarray/test_extra_ops.py
index 814b62fc71..32912a4086 100644
--- a/tests/gpuarray/test_extra_ops.py
+++ b/tests/gpuarray/test_extra_ops.py
@@ -4,15 +4,15 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor.math as tm
+import aesara
+import aesara.tensor.math as tm
+from aesara.gpuarray.extra_ops import GpuCumOp
+from aesara.gpuarray.type import get_context
+from aesara.tensor.extra_ops import CumOp
+from aesara.tensor.type import fmatrix, ftensor3, ftensor4, fvector, tensor3
 from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu, test_ctx_name
 from tests.tensor.test_extra_ops import TestCumOp
-from theano.gpuarray.extra_ops import GpuCumOp
-from theano.gpuarray.type import get_context
-from theano.tensor.extra_ops import CumOp
-from theano.tensor.type import fmatrix, ftensor3, ftensor4, fvector, tensor3
 
 
 class TestGpuCumOp(TestCumOp):
@@ -39,14 +39,14 @@ def teardown_method(self):
         tm.float32_rtol = self.old_rtol
 
     @pytest.mark.skipif(
-        theano.config.floatX != "float32",
-        reason=f"Gpucumop not implemented for dtype {theano.config.floatX}",
+        aesara.config.floatX != "float32",
+        reason=f"Gpucumop not implemented for dtype {aesara.config.floatX}",
     )
     @pytest.mark.parametrized("mode", ["mul", "add"])
     def test_infer_shape(self, mode):
         op_class = partial(self.op_class, mode=mode)
         x = tensor3("x")
-        a = np.random.random((3, 5, 2)).astype(theano.config.floatX)
+        a = np.random.random((3, 5, 2)).astype(aesara.config.floatX)
 
         for axis in range(-len(a.shape), len(a.shape)):
             self._compile_and_check([x], [op_class(axis=axis)(x)], [a], GpuCumOp)
@@ -59,7 +59,7 @@ def test_Strides1D(self, mode):
 
         for axis in [0, None, -1]:
             a = np.random.random((42,)).astype("float32")
-            cumop_function = theano.function(
+            cumop_function = aesara.function(
                 [x], op_class(axis=axis)(x), mode=self.mode
             )
 
@@ -71,7 +71,7 @@ def test_Strides1D(self, mode):
 
             # Cartesian product of all slicings to test.
             for slicing in product(slicings, repeat=x.ndim):
-                f = theano.function(
+                f = aesara.function(
                     [x], op_class(axis=axis)(x[slicing]), mode=self.mode
                 )
                 assert [
@@ -90,7 +90,7 @@ def test_Strides2D(self, mode):
 
         for axis in [0, 1, None, -1, -2]:
             a = np.random.random((42, 30)).astype("float32")
-            cumop_function = theano.function(
+            cumop_function = aesara.function(
                 [x], op_class(axis=axis)(x), mode=self.mode
             )
 
@@ -102,7 +102,7 @@ def test_Strides2D(self, mode):
 
             # Cartesian product of all slicings to test.
             for slicing in product(slicings, repeat=x.ndim):
-                f = theano.function(
+                f = aesara.function(
                     [x], op_class(axis=axis)(x[slicing]), mode=self.mode
                 )
                 assert [
@@ -121,7 +121,7 @@ def test_Strides3D(self, mode):
 
         for axis in [0, 1, 2, None, -1, -2, -3]:
             a = np.random.random((42, 30, 25)).astype("float32")
-            cumop_function = theano.function(
+            cumop_function = aesara.function(
                 [x], op_class(axis=axis)(x), mode=self.mode
             )
 
@@ -133,7 +133,7 @@ def test_Strides3D(self, mode):
 
             # Cartesian product of all slicings to test.
             for slicing in product(slicings, repeat=x.ndim):
-                f = theano.function(
+                f = aesara.function(
                     [x], op_class(axis=axis)(x[slicing]), mode=self.mode
                 )
                 assert [
@@ -151,7 +151,7 @@ def test_GpuCumOp1D(self, mode):
         block_max_size = self.max_threads_dim0 * 2
 
         x = fvector("x")
-        f = theano.function([x], op_class(axis=0)(x), mode=self.mode)
+        f = aesara.function([x], op_class(axis=0)(x), mode=self.mode)
         assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)]
 
         # Extensive testing for the first 1025 sizes
@@ -175,7 +175,7 @@ def test_GpuCumOp2D(self, mode):
 
         x = fmatrix("x")
         for shape_axis, axis in zip([0, 1, 0, 1, 0], [0, 1, None, -1, -2]):
-            f = theano.function([x], op_class(axis=axis)(x), mode=self.mode)
+            f = aesara.function([x], op_class(axis=axis)(x), mode=self.mode)
             assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)]
 
             # Extensive testing for the first 1025 sizes
@@ -216,7 +216,7 @@ def test_GpuCumOp3D(self, mode):
 
         x = ftensor3("x")
         for shape_axis, axis in zip([0, 1, 2, 0, 2, 1, 0], [0, 1, 2, None, -1, -2, -3]):
-            f = theano.function([x], op_class(axis=axis)(x), mode=self.mode)
+            f = aesara.function([x], op_class(axis=axis)(x), mode=self.mode)
             assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)]
 
             # Extensive testing for the first 1025 sizes
@@ -265,5 +265,5 @@ def test_GpuCumOp4D(self, mode):
         op_class = partial(self.op_class, mode=mode)
         # Should not use the GPU version.
         x = ftensor4("x")
-        f = theano.function([x], op_class(axis=1)(x), mode=self.mode)
+        f = aesara.function([x], op_class(axis=1)(x), mode=self.mode)
         assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, CumOp)]
diff --git a/tests/gpuarray/test_fft.py b/tests/gpuarray/test_fft.py
index 33d88081e8..4da4f51079 100644
--- a/tests/gpuarray/test_fft.py
+++ b/tests/gpuarray/test_fft.py
@@ -1,12 +1,12 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.gpuarray.fft
+import aesara
+import aesara.gpuarray.fft
+from aesara.gpuarray.fft import pycuda_available, pygpu_available, skcuda_available
+from aesara.tensor.type import matrix
 from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu
-from theano.gpuarray.fft import pycuda_available, pygpu_available, skcuda_available
-from theano.tensor.type import matrix
 
 
 # Skip tests if pygpu is not available.
@@ -26,8 +26,8 @@ def test_1Dfft(self):
         inputs_val = np.random.random((1, N)).astype("float32")
 
         x = matrix("x", dtype="float32")
-        rfft = theano.gpuarray.fft.curfft(x)
-        f_rfft = theano.function([x], rfft, mode=mode_with_gpu)
+        rfft = aesara.gpuarray.fft.curfft(x)
+        f_rfft = aesara.function([x], rfft, mode=mode_with_gpu)
         res_rfft = f_rfft(inputs_val)
         res_rfft_comp = np.asarray(res_rfft[:, :, 0]) + 1j * np.asarray(
             res_rfft[:, :, 1]
@@ -38,8 +38,8 @@ def test_1Dfft(self):
         utt.assert_allclose(rfft_ref, res_rfft_comp)
 
         m = rfft.type()
-        irfft = theano.gpuarray.fft.cuirfft(m)
-        f_irfft = theano.function([m], irfft, mode=mode_with_gpu)
+        irfft = aesara.gpuarray.fft.cuirfft(m)
+        f_irfft = aesara.function([m], irfft, mode=mode_with_gpu)
         res_irfft = f_irfft(res_rfft)
 
         utt.assert_allclose(inputs_val, np.asarray(res_irfft))
@@ -49,23 +49,23 @@ def test_1Dfft(self):
         eps = 1e-1
 
         def f_rfft(inp):
-            return theano.gpuarray.fft.curfft(inp)
+            return aesara.gpuarray.fft.curfft(inp)
 
         inputs_val = np.random.random((1, N)).astype("float32")
         utt.verify_grad(f_rfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
         def f_irfft(inp):
-            return theano.gpuarray.fft.cuirfft(inp)
+            return aesara.gpuarray.fft.cuirfft(inp)
 
         inputs_val = np.random.random((1, N // 2 + 1, 2)).astype("float32")
         utt.verify_grad(f_irfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
     def test_rfft(self):
         inputs_val = np.random.random((1, N, N)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
-        rfft = theano.gpuarray.fft.curfft(inputs)
-        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        rfft = aesara.gpuarray.fft.curfft(inputs)
+        f_rfft = aesara.function([], rfft, mode=mode_with_gpu)
         res_rfft = f_rfft()
         res_rfft_comp = np.asarray(res_rfft[:, :, :, 0]) + 1j * np.asarray(
             res_rfft[:, :, :, 1]
@@ -77,24 +77,24 @@ def test_rfft(self):
 
     def test_irfft(self):
         inputs_val = np.random.random((1, N, N)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
-        fft = theano.gpuarray.fft.curfft(inputs)
-        f_fft = theano.function([], fft, mode=mode_with_gpu)
+        fft = aesara.gpuarray.fft.curfft(inputs)
+        f_fft = aesara.function([], fft, mode=mode_with_gpu)
         res_fft = f_fft()
 
         m = fft.type()
-        ifft = theano.gpuarray.fft.cuirfft(m)
-        f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
+        ifft = aesara.gpuarray.fft.cuirfft(m)
+        f_ifft = aesara.function([m], ifft, mode=mode_with_gpu)
         res_ifft = f_ifft(res_fft)
 
         utt.assert_allclose(inputs_val, np.asarray(res_ifft))
 
         inputs_val = np.random.random((1, N, N, 2)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
-        irfft = theano.gpuarray.fft.cuirfft(inputs)
-        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
+        irfft = aesara.gpuarray.fft.cuirfft(inputs)
+        f_irfft = aesara.function([], irfft, mode=mode_with_gpu)
         res_irfft = f_irfft()
         inputs_ref = inputs_val[..., 0] + inputs_val[..., 1] * 1j
 
@@ -104,20 +104,20 @@ def test_irfft(self):
 
     def test_type(self):
         inputs_val = np.random.random((1, N)).astype("float64")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
         with pytest.raises(AssertionError):
-            theano.gpuarray.fft.curfft(inputs)
+            aesara.gpuarray.fft.curfft(inputs)
         with pytest.raises(AssertionError):
-            theano.gpuarray.fft.cuirfft(inputs)
+            aesara.gpuarray.fft.cuirfft(inputs)
 
     def test_norm(self):
         inputs_val = np.random.random((1, N, N)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
         # Unitary normalization
-        rfft = theano.gpuarray.fft.curfft(inputs, norm="ortho")
-        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        rfft = aesara.gpuarray.fft.curfft(inputs, norm="ortho")
+        f_rfft = aesara.function([], rfft, mode=mode_with_gpu)
         res_rfft = f_rfft()
         res_rfft_comp = np.asarray(res_rfft[:, :, :, 0]) + 1j * np.asarray(
             res_rfft[:, :, :, 1]
@@ -128,8 +128,8 @@ def test_norm(self):
         utt.assert_allclose(rfft_ref / N, res_rfft_comp, atol=1e-4, rtol=1e-4)
 
         # No normalization
-        rfft = theano.gpuarray.fft.curfft(inputs, norm="no_norm")
-        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        rfft = aesara.gpuarray.fft.curfft(inputs, norm="no_norm")
+        f_rfft = aesara.function([], rfft, mode=mode_with_gpu)
         res_rfft = f_rfft()
         res_rfft_comp = np.asarray(res_rfft[:, :, :, 0]) + 1j * np.asarray(
             res_rfft[:, :, :, 1]
@@ -139,12 +139,12 @@ def test_norm(self):
 
         # Inverse FFT inputs
         inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
         inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
 
         # Unitary normalization inverse FFT
-        irfft = theano.gpuarray.fft.cuirfft(inputs, norm="ortho")
-        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
+        irfft = aesara.gpuarray.fft.cuirfft(inputs, norm="ortho")
+        f_irfft = aesara.function([], irfft, mode=mode_with_gpu)
         res_irfft = f_irfft()
 
         irfft_ref = np.fft.irfftn(inputs_ref, axes=(1, 2))
@@ -152,8 +152,8 @@ def test_norm(self):
         utt.assert_allclose(irfft_ref * N, res_irfft, atol=1e-4, rtol=1e-4)
 
         # No normalization inverse FFT
-        irfft = theano.gpuarray.fft.cuirfft(inputs, norm="no_norm")
-        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
+        irfft = aesara.gpuarray.fft.cuirfft(inputs, norm="no_norm")
+        f_irfft = aesara.function([], irfft, mode=mode_with_gpu)
         res_irfft = f_irfft()
 
         utt.assert_allclose(irfft_ref * N ** 2, res_irfft, atol=1e-4, rtol=1e-4)
@@ -164,25 +164,25 @@ def test_grad(self):
         eps = 1e-1
 
         def f_rfft(inp):
-            return theano.gpuarray.fft.curfft(inp)
+            return aesara.gpuarray.fft.curfft(inp)
 
         inputs_val = np.random.random((1, N, N)).astype("float32")
         utt.verify_grad(f_rfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
         def f_irfft(inp):
-            return theano.gpuarray.fft.cuirfft(inp)
+            return aesara.gpuarray.fft.cuirfft(inp)
 
         inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype("float32")
         utt.verify_grad(f_irfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
         def f_rfft(inp):
-            return theano.gpuarray.fft.curfft(inp, norm="ortho")
+            return aesara.gpuarray.fft.curfft(inp, norm="ortho")
 
         inputs_val = np.random.random((1, N, N)).astype("float32")
         utt.verify_grad(f_rfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
         def f_irfft(inp):
-            return theano.gpuarray.fft.cuirfft(inp, norm="no_norm")
+            return aesara.gpuarray.fft.cuirfft(inp, norm="no_norm")
 
         inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype("float32")
         utt.verify_grad(f_irfft, [inputs_val], eps=eps, mode=mode_with_gpu)
@@ -191,10 +191,10 @@ def test_odd(self):
         M = N - 1
 
         inputs_val = np.random.random((1, M, M)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
-        rfft = theano.gpuarray.fft.curfft(inputs)
-        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        rfft = aesara.gpuarray.fft.curfft(inputs)
+        f_rfft = aesara.function([], rfft, mode=mode_with_gpu)
         res_rfft = f_rfft()
 
         res_rfft_comp = np.asarray(res_rfft[:, :, :, 0]) + 1j * np.asarray(
@@ -206,17 +206,17 @@ def test_odd(self):
         utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
 
         m = rfft.type()
-        ifft = theano.gpuarray.fft.cuirfft(m, is_odd=True)
-        f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
+        ifft = aesara.gpuarray.fft.cuirfft(m, is_odd=True)
+        f_ifft = aesara.function([m], ifft, mode=mode_with_gpu)
         res_ifft = f_ifft(res_rfft)
 
         utt.assert_allclose(inputs_val, np.asarray(res_ifft))
 
         inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
-        irfft = theano.gpuarray.fft.cuirfft(inputs, norm="ortho", is_odd=True)
-        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
+        irfft = aesara.gpuarray.fft.cuirfft(inputs, norm="ortho", is_odd=True)
+        f_irfft = aesara.function([], irfft, mode=mode_with_gpu)
         res_irfft = f_irfft()
 
         inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
@@ -229,40 +229,40 @@ def test_odd(self):
         eps = 1e-1
 
         def f_rfft(inp):
-            return theano.gpuarray.fft.curfft(inp)
+            return aesara.gpuarray.fft.curfft(inp)
 
         inputs_val = np.random.random((1, M, M)).astype("float32")
         utt.verify_grad(f_rfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
         def f_irfft(inp):
-            return theano.gpuarray.fft.cuirfft(inp, is_odd=True)
+            return aesara.gpuarray.fft.cuirfft(inp, is_odd=True)
 
         inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype("float32")
         utt.verify_grad(f_irfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
         def f_rfft(inp):
-            return theano.gpuarray.fft.curfft(inp, norm="ortho")
+            return aesara.gpuarray.fft.curfft(inp, norm="ortho")
 
         inputs_val = np.random.random((1, M, M)).astype("float32")
         utt.verify_grad(f_rfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
         def f_irfft(inp):
-            return theano.gpuarray.fft.cuirfft(inp, norm="no_norm", is_odd=True)
+            return aesara.gpuarray.fft.cuirfft(inp, norm="no_norm", is_odd=True)
 
         inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype("float32")
         utt.verify_grad(f_irfft, [inputs_val], eps=eps, mode=mode_with_gpu)
 
     def test_params(self):
         inputs_val = np.random.random((1, N)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
         with pytest.raises(ValueError):
-            theano.gpuarray.fft.curfft(inputs, norm=123)
+            aesara.gpuarray.fft.curfft(inputs, norm=123)
 
         inputs_val = np.random.random((1, N // 2 + 1, 2)).astype("float32")
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
 
         with pytest.raises(ValueError):
-            theano.gpuarray.fft.cuirfft(inputs, norm=123)
+            aesara.gpuarray.fft.cuirfft(inputs, norm=123)
         with pytest.raises(ValueError):
-            theano.gpuarray.fft.cuirfft(inputs, is_odd=123)
+            aesara.gpuarray.fft.cuirfft(inputs, is_odd=123)
diff --git a/tests/gpuarray/test_gemmcorr.py b/tests/gpuarray/test_gemmcorr.py
index 9e7f429fa4..fc8b2f22e4 100644
--- a/tests/gpuarray/test_gemmcorr.py
+++ b/tests/gpuarray/test_gemmcorr.py
@@ -1,6 +1,10 @@
 import numpy as np
 
-import theano
+import aesara
+from aesara.configdefaults import config
+from aesara.gpuarray.blas import GpuCorrMM, GpuCorrMM_gradInputs, GpuCorrMM_gradWeights
+from aesara.gpuarray.type import gpuarray_shared_constructor
+from aesara.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
 from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, ref_cast
 from tests.tensor.nnet.test_abstract_conv import (
@@ -9,10 +13,6 @@
     TestGroupedConvNoOptim,
     TestUnsharedConv,
 )
-from theano.configdefaults import config
-from theano.gpuarray.blas import GpuCorrMM, GpuCorrMM_gradInputs, GpuCorrMM_gradWeights
-from theano.gpuarray.type import gpuarray_shared_constructor
-from theano.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
 
 
 class TestCorrMM:
@@ -44,7 +44,7 @@ def run_conv_valid(
             subsample=subsample,
             unshared=unshared,
         )(ref_cast(inputs), ref_cast(filters))
-        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
+        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)
 
         conv = GpuCorrMM(
             border_mode=border_mode,
@@ -52,7 +52,7 @@ def run_conv_valid(
             subsample=subsample,
             unshared=unshared,
         )(inputs, filters)
-        f = theano.function([], conv, mode=mode_with_gpu)
+        f = aesara.function([], conv, mode=mode_with_gpu)
 
         res_ref = f_ref()
         res = f()
@@ -236,8 +236,8 @@ def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, subsample=(1,
                 inputs, dCdH, shape=shape
             )
 
-        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
-        f = theano.function([], conv_gemm, mode=mode_with_gpu)
+        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)
+        f = aesara.function([], conv_gemm, mode=mode_with_gpu)
 
         res_ref = f_ref()
         res = f()
@@ -299,8 +299,8 @@ def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1)):
                 kern=filters, topgrad=inputs, shape=bottom_shape
             )
 
-        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
-        f = theano.function([], conv_gemm, mode=mode_with_gpu)
+        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)
+        f = aesara.function([], conv_gemm, mode=mode_with_gpu)
 
         res_ref = f_ref()
         res = f()
diff --git a/tests/gpuarray/test_gemmcorr3d.py b/tests/gpuarray/test_gemmcorr3d.py
index f448cfe980..f06773164f 100644
--- a/tests/gpuarray/test_gemmcorr3d.py
+++ b/tests/gpuarray/test_gemmcorr3d.py
@@ -1,17 +1,17 @@
 import numpy as np
 
-import theano
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, ref_cast
-from tests.tensor.nnet.test_abstract_conv import TestGroupedConv3dNoOptim
-from theano.configdefaults import config
-from theano.gpuarray.blas import (
+import aesara
+from aesara.configdefaults import config
+from aesara.gpuarray.blas import (
     GpuCorr3dMM,
     GpuCorr3dMM_gradInputs,
     GpuCorr3dMM_gradWeights,
 )
-from theano.gpuarray.type import gpuarray_shared_constructor
-from theano.tensor.nnet.corr3d import Corr3dMM, Corr3dMMGradInputs, Corr3dMMGradWeights
+from aesara.gpuarray.type import gpuarray_shared_constructor
+from aesara.tensor.nnet.corr3d import Corr3dMM, Corr3dMMGradInputs, Corr3dMMGradWeights
+from tests import unittest_tools as utt
+from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, ref_cast
+from tests.tensor.nnet.test_abstract_conv import TestGroupedConv3dNoOptim
 
 
 class TestCorr3dMM:
@@ -38,14 +38,14 @@ def run_conv_valid(
             filter_dilation=filter_dilation,
             subsample=subsample,
         )(ref_cast(inputs), ref_cast(filters))
-        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
+        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)
 
         conv = GpuCorr3dMM(
             border_mode=border_mode,
             filter_dilation=filter_dilation,
             subsample=subsample,
         )(inputs, filters)
-        f = theano.function([], conv, mode=mode_with_gpu)
+        f = aesara.function([], conv, mode=mode_with_gpu)
 
         res_ref = f_ref()
         res = f()
@@ -183,8 +183,8 @@ def run_gradweight(
                 inputs, dCdH, shape=shape
             )
 
-        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
-        f = theano.function([], conv_gemm, mode=mode_with_gpu)
+        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)
+        f = aesara.function([], conv_gemm, mode=mode_with_gpu)
 
         res_ref = f_ref()
         res = f()
@@ -247,8 +247,8 @@ def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)):
                 kern=filters, topgrad=inputs, shape=bottom_shape
             )
 
-        f_ref = theano.function([], conv_ref, mode=mode_without_gpu)
-        f = theano.function([], conv_gemm, mode=mode_with_gpu)
+        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)
+        f = aesara.function([], conv_gemm, mode=mode_with_gpu)
 
         res_ref = f_ref()
         res = f()
diff --git a/tests/gpuarray/test_gpuarray_multinomial_wo_replacement.pkl b/tests/gpuarray/test_gpuarray_multinomial_wo_replacement.pkl
deleted file mode 100644
index c6653d6215..0000000000
--- a/tests/gpuarray/test_gpuarray_multinomial_wo_replacement.pkl
+++ /dev/null
@@ -1,16 +0,0 @@
-ccopy_reg
-_reconstructor
-p1
-(ctheano.gpuarray.multinomial
-GPUAMultinomialWOReplacementFromUniform
-p2
-c__builtin__
-object
-p3
-NtRp4
-(dp5
-S'odtype'
-p6
-S'float32'
-p7
-sb.
\ No newline at end of file
diff --git a/tests/gpuarray/test_linalg.py b/tests/gpuarray/test_linalg.py
index 46e4cd8c1d..3c9d8ebd1e 100644
--- a/tests/gpuarray/test_linalg.py
+++ b/tests/gpuarray/test_linalg.py
@@ -2,13 +2,10 @@
 import pytest
 from numpy.linalg.linalg import LinAlgError
 
-import theano
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
-from tests.gpuarray.test_basic_ops import rand
-from theano.configdefaults import config
-from theano.gpuarray import gpuarray_shared_constructor
-from theano.gpuarray.linalg import (
+import aesara
+from aesara.configdefaults import config
+from aesara.gpuarray import gpuarray_shared_constructor
+from aesara.gpuarray.linalg import (
     GpuCholesky,
     GpuCublasTriangularSolve,
     GpuCusolverSolve,
@@ -25,7 +22,7 @@
     gpu_solve_lower_triangular,
     gpu_svd,
 )
-from theano.tensor.nlinalg import (
+from aesara.tensor.nlinalg import (
     SVD,
     MatrixInverse,
     QRFull,
@@ -34,8 +31,11 @@
     matrix_inverse,
     qr,
 )
-from theano.tensor.slinalg import Cholesky, cholesky, imported_scipy
-from theano.tensor.type import fmatrix, matrix, tensor3, vector
+from aesara.tensor.slinalg import Cholesky, cholesky, imported_scipy
+from aesara.tensor.type import fmatrix, matrix, tensor3, vector
+from tests import unittest_tools as utt
+from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
+from tests.gpuarray.test_basic_ops import rand
 
 
 @pytest.mark.skipif(
@@ -58,7 +58,7 @@ def run_gpu_solve(self, A_val, x_val, A_struct=None):
             solver = gpu_solve(A, b, A_struct)
             solver_trans = gpu_solve(A, b_trans, A_struct, trans="T")
 
-        fn = theano.function(
+        fn = aesara.function(
             [A, b, b_trans], [solver, solver_trans], mode=mode_with_gpu
         )
         res = fn(A_val, b_val, b_val_trans)
@@ -115,7 +115,7 @@ def test_linalgerrsym_solve(self):
         b = matrix("b", dtype="float32")
         solver = gpu_solve(A, b, "symmetric")
 
-        fn = theano.function([A, b], [solver], mode=mode_with_gpu)
+        fn = aesara.function([A, b], [solver], mode=mode_with_gpu)
         with pytest.raises(LinAlgError):
             fn(A_val, x_val)
 
@@ -130,7 +130,7 @@ def test_linalgerr_solve(self):
         b = matrix("b", dtype="float32")
         solver = gpu_solve(A, b, trans="T")
 
-        fn = theano.function([A, b], [solver], mode=mode_with_gpu)
+        fn = aesara.function([A, b], [solver], mode=mode_with_gpu)
         with pytest.raises(LinAlgError):
             fn(A_val, x_val)
 
@@ -182,7 +182,7 @@ def get_gpu_cholesky_func(self, lower=True, inplace=False):
         A = matrix("A", dtype="float32")
         cholesky_op = GpuCholesky(lower=lower, inplace=inplace)
         chol_A = cholesky_op(A)
-        return theano.function([A], chol_A, accept_inplace=inplace, mode=mode_with_gpu)
+        return aesara.function([A], chol_A, accept_inplace=inplace, mode=mode_with_gpu)
 
     def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False):
         # Helper function to compare op output to np.cholesky output.
@@ -199,7 +199,7 @@ def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False):
     )
     def test_gpu_cholesky_opt(self):
         A = matrix("A", dtype="float32")
-        fn = theano.function([A], cholesky(A), mode=mode_with_gpu)
+        fn = aesara.function([A], cholesky(A), mode=mode_with_gpu)
         assert any(
             [isinstance(node.op, GpuCholesky) for node in fn.maker.fgraph.toposort()]
         )
@@ -283,7 +283,7 @@ def get_gpu_cholesky_func(self, lower=True, inplace=False):
         A = matrix("A", dtype="float64")
         cholesky_op = GpuCholesky(lower=lower, inplace=inplace)
         chol_A = cholesky_op(A)
-        return theano.function([A], chol_A, accept_inplace=inplace, mode=mode_with_gpu)
+        return aesara.function([A], chol_A, accept_inplace=inplace, mode=mode_with_gpu)
 
     def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False):
         # Helper function to compare op output to np.cholesky output.
@@ -300,7 +300,7 @@ def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False):
     )
     def test_gpu_cholesky_opt(self):
         A = matrix("A", dtype="float64")
-        fn = theano.function([A], cholesky(A), mode=mode_with_gpu)
+        fn = aesara.function([A], cholesky(A), mode=mode_with_gpu)
         assert any(
             [isinstance(node.op, GpuCholesky) for node in fn.maker.fgraph.toposort()]
         )
@@ -387,7 +387,7 @@ def test_magma_opt_float16(self):
         ]
         for op, gpu_op in ops_to_gpu:
             A = matrix("A", dtype="float16")
-            fn = theano.function([A], op(A), mode=mode_with_gpu.excluding("cusolver"))
+            fn = aesara.function([A], op(A), mode=mode_with_gpu.excluding("cusolver"))
             assert any(
                 [isinstance(node.op, gpu_op) for node in fn.maker.fgraph.toposort()]
             )
@@ -395,7 +395,7 @@ def test_magma_opt_float16(self):
     def test_gpu_matrix_inverse(self):
         A = fmatrix("A")
 
-        fn = theano.function([A], gpu_matrix_inverse(A), mode=mode_with_gpu)
+        fn = aesara.function([A], gpu_matrix_inverse(A), mode=mode_with_gpu)
         N = 1000
         test_rng = np.random.RandomState(seed=1)
         # Copied from tests.tensor.utils.rand.
@@ -412,7 +412,7 @@ def test_gpu_matrix_inverse_inplace(self):
         )
         A_val_copy = A_val_gpu.get_value()
         A_val_gpu_inv = GpuMagmaMatrixInverse()(A_val_gpu)
-        fn = theano.function(
+        fn = aesara.function(
             [], A_val_gpu_inv, mode=mode_with_gpu, updates=[(A_val_gpu, A_val_gpu_inv)]
         )
         assert any(
@@ -430,7 +430,7 @@ def test_gpu_matrix_inverse_inplace(self):
     @utt.assertFailure_fast
     def test_gpu_matrix_inverse_inplace_opt(self):
         A = fmatrix("A")
-        fn = theano.function([A], matrix_inverse(A), mode=mode_with_gpu)
+        fn = aesara.function([A], matrix_inverse(A), mode=mode_with_gpu)
         assert any(
             [
                 node.op.inplace
@@ -441,7 +441,7 @@ def test_gpu_matrix_inverse_inplace_opt(self):
 
     def run_gpu_svd(self, A_val, full_matrices=True, compute_uv=True):
         A = fmatrix("A")
-        f = theano.function(
+        f = aesara.function(
             [A],
             gpu_svd(A, full_matrices=full_matrices, compute_uv=compute_uv),
             mode=mode_with_gpu,
@@ -488,10 +488,10 @@ def test_gpu_svd_tall(self):
 
     def test_gpu_singular_values(self):
         A = fmatrix("A")
-        f_cpu = theano.function(
-            [A], theano.tensor.nlinalg.svd(A, compute_uv=False), mode=mode_without_gpu
+        f_cpu = aesara.function(
+            [A], aesara.tensor.nlinalg.svd(A, compute_uv=False), mode=mode_without_gpu
         )
-        f_gpu = theano.function([A], gpu_svd(A, compute_uv=False), mode=mode_with_gpu)
+        f_gpu = aesara.function([A], gpu_svd(A, compute_uv=False), mode=mode_with_gpu)
 
         A_val = rand(50, 100).astype("float32")
         utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
@@ -501,7 +501,7 @@ def test_gpu_singular_values(self):
 
     def run_gpu_cholesky(self, A_val, lower=True):
         A = fmatrix("A")
-        f = theano.function(
+        f = aesara.function(
             [A],
             GpuMagmaCholesky(lower=lower)(A),
             mode=mode_with_gpu.excluding("cusolver"),
@@ -531,7 +531,7 @@ def test_gpu_cholesky(self):
 
     def test_gpu_cholesky_opt(self):
         A = matrix("A", dtype="float32")
-        fn = theano.function([A], cholesky(A), mode=mode_with_gpu.excluding("cusolver"))
+        fn = aesara.function([A], cholesky(A), mode=mode_with_gpu.excluding("cusolver"))
         assert any(
             [
                 isinstance(node.op, GpuMagmaCholesky)
@@ -545,7 +545,7 @@ def test_gpu_cholesky_inplace(self):
         A_gpu = gpuarray_shared_constructor(A)
         A_copy = A_gpu.get_value()
         C = GpuMagmaCholesky()(A_gpu)
-        fn = theano.function([], C, mode=mode_with_gpu, updates=[(A_gpu, C)])
+        fn = aesara.function([], C, mode=mode_with_gpu, updates=[(A_gpu, C)])
         assert any(
             [
                 node.op.inplace
@@ -560,7 +560,7 @@ def test_gpu_cholesky_inplace(self):
     @utt.assertFailure_fast
     def test_gpu_cholesky_inplace_opt(self):
         A = fmatrix("A")
-        fn = theano.function([A], GpuMagmaCholesky()(A), mode=mode_with_gpu)
+        fn = aesara.function([A], GpuMagmaCholesky()(A), mode=mode_with_gpu)
         assert any(
             [
                 node.op.inplace
@@ -571,7 +571,7 @@ def test_gpu_cholesky_inplace_opt(self):
 
     def run_gpu_qr(self, A_val, complete=True):
         A = fmatrix("A")
-        fn = theano.function([A], gpu_qr(A, complete=complete), mode=mode_with_gpu)
+        fn = aesara.function([A], gpu_qr(A, complete=complete), mode=mode_with_gpu)
         return fn(A_val)
 
     def check_gpu_qr(self, M, N, complete=True, rtol=None, atol=None):
@@ -594,7 +594,7 @@ def test_gpu_qr(self):
 
     def test_gpu_qr_opt(self):
         A = fmatrix("A")
-        fn = theano.function([A], qr(A), mode=mode_with_gpu)
+        fn = aesara.function([A], qr(A), mode=mode_with_gpu)
         assert any(
             [
                 isinstance(node.op, GpuMagmaQR) and node.op.complete
@@ -604,7 +604,7 @@ def test_gpu_qr_opt(self):
 
     def test_gpu_qr_incomplete_opt(self):
         A = fmatrix("A")
-        fn = theano.function([A], qr(A, mode="r"), mode=mode_with_gpu)
+        fn = aesara.function([A], qr(A, mode="r"), mode=mode_with_gpu)
         assert any(
             [
                 isinstance(node.op, GpuMagmaQR) and not node.op.complete
@@ -614,7 +614,7 @@ def test_gpu_qr_incomplete_opt(self):
 
     def run_gpu_eigh(self, A_val, UPLO="L", compute_v=True):
         A = fmatrix("A")
-        fn = theano.function(
+        fn = aesara.function(
             [A], GpuMagmaEigh(UPLO=UPLO, compute_v=compute_v)(A), mode=mode_with_gpu
         )
         return fn(A_val)
@@ -644,13 +644,13 @@ def test_gpu_eigh(self):
 
     def test_gpu_eigh_opt(self):
         A = fmatrix("A")
-        fn = theano.function([A], eigh(A), mode=mode_with_gpu)
+        fn = aesara.function([A], eigh(A), mode=mode_with_gpu)
         assert any(
             [isinstance(node.op, GpuMagmaEigh) for node in fn.maker.fgraph.toposort()]
         )
 
 
-# mostly copied from theano/tensor/tests/test_slinalg.py
+# mostly copied from aesara/tensor/tests/test_slinalg.py
 def test_cholesky_grad():
     rng = np.random.RandomState(utt.fetch_seed())
     r = rng.randn(5, 5).astype(config.floatX)
@@ -669,7 +669,7 @@ def test_cholesky_grad_indef():
     x = matrix()
     mat = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX)
     cholesky = GpuCholesky(lower=True)
-    chol_f = theano.function([x], theano.gradient.grad(cholesky(x).sum(), [x]))
+    chol_f = aesara.function([x], aesara.gradient.grad(cholesky(x).sum(), [x]))
     with pytest.raises(LinAlgError):
         chol_f(mat)
     # cholesky = GpuCholesky(lower=True, on_error='nan')
@@ -698,9 +698,9 @@ def f(r, y):
         PD = r.dot(r.T)
         L = gpu_cholesky(PD)
         A = gpu_solve_lower_triangular(L, y)
-        AAT = theano.tensor.dot(A, A.T)
-        B = AAT + theano.tensor.eye(N)
+        AAT = aesara.tensor.dot(A, A.T)
+        B = AAT + aesara.tensor.eye(N)
         LB = gpu_cholesky(B)
-        return theano.tensor.sum(theano.tensor.log(theano.tensor.diag(LB)))
+        return aesara.tensor.sum(aesara.tensor.log(aesara.tensor.diag(LB)))
 
     utt.verify_grad(f, [r, y], 3, rng)
diff --git a/tests/gpuarray/test_misc.py b/tests/gpuarray/test_misc.py
index 6dc2cdbf4e..27adad67c1 100644
--- a/tests/gpuarray/test_misc.py
+++ b/tests/gpuarray/test_misc.py
@@ -2,10 +2,10 @@
 # tests in the same directory, we put them here.
 import numpy as np
 
-import theano
+import aesara
+from aesara.compile.nanguardmode import NanGuardMode
+from aesara.tensor.type import vector
 from tests.gpuarray.config import mode_with_gpu
-from theano.compile.nanguardmode import NanGuardMode
-from theano.tensor.type import vector
 
 
 def test_nan_guard_mode():
@@ -14,6 +14,6 @@ def test_nan_guard_mode():
         x = vector(dtype=dtype)
         y = x + 1
         mode = NanGuardMode(nan_is_error=True, optimizer=mode_with_gpu.optimizer)
-        f = theano.function([x], y, mode=mode)
+        f = aesara.function([x], y, mode=mode)
         d = np.asarray([23, 7]).astype(dtype)
         assert np.allclose(f(d), d + 1)
diff --git a/tests/gpuarray/test_multinomial.py b/tests/gpuarray/test_multinomial.py
index 477fc0c97a..49191a155b 100644
--- a/tests/gpuarray/test_multinomial.py
+++ b/tests/gpuarray/test_multinomial.py
@@ -1,18 +1,18 @@
 import numpy as np
 import pytest
 
+import aesara
 import tests.unittest_tools as utt
-import theano
-from tests.gpuarray.config import mode_with_gpu
-from theano import function
-from theano.configdefaults import config
-from theano.gpuarray.multinomial import (
+from aesara import function
+from aesara.configdefaults import config
+from aesara.gpuarray.multinomial import (
     GPUAChoiceFromUniform,
     GPUAMultinomialFromUniform,
 )
-from theano.sandbox import multinomial
-from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream
-from theano.tensor.type import fmatrix, frow, fvector, iscalar, matrix, vector
+from aesara.sandbox import multinomial
+from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
+from aesara.tensor.type import fmatrix, frow, fvector, iscalar, matrix, vector
+from tests.gpuarray.config import mode_with_gpu
 
 
 def test_multinomial_output_dtype():
@@ -23,7 +23,7 @@ def test_multinomial_output_dtype():
     u = fvector()
 
     for dtype in ["int64", "float32", "float16", "float64", "int32", "auto"]:
-        m = theano.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)
+        m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)
 
         # the m*2 allows the multinomial to reuse output
         f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu)
@@ -64,7 +64,7 @@ def test_multinomial_input_dtype():
             u = vector("u", idtype)
             # p = dmatrix('p')
             # u = dvector('u')
-            m = theano.sandbox.multinomial.MultinomialFromUniform(odtype)(p, u)
+            m = aesara.sandbox.multinomial.MultinomialFromUniform(odtype)(p, u)
 
             # the m*2 allows the multinomial to reuse output
             f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu)
@@ -99,7 +99,7 @@ def test_multinomial_large():
     # DEBUG_MODE will test this on GPU
     p = fmatrix()
     u = fvector()
-    m = theano.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
+    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
     f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu)
     assert any(
         [
@@ -132,7 +132,7 @@ def test_gpu_opt_dtypes():
     for dtype in ["uint32", "float32", "int64", "float64"]:
         p = fmatrix()
         u = fvector()
-        m = theano.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)
+        m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)
 
         f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
         assert any(
@@ -155,7 +155,7 @@ def test_gpu_opt():
     # is moved to the gpu.
     p = fmatrix()
     u = fvector()
-    m = theano.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
+    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
     assert m.dtype == "float32", m.dtype
 
     f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
@@ -172,7 +172,7 @@ def test_gpu_opt():
 
     # Test with a row, it was failing in the past.
     r = frow()
-    m = theano.sandbox.multinomial.MultinomialFromUniform("auto")(r, u)
+    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(r, u)
     assert m.dtype == "float32", m.dtype
 
     f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu)
diff --git a/tests/gpuarray/test_neighbours.py b/tests/gpuarray/test_neighbours.py
index 7c66345dcf..a809c14e73 100644
--- a/tests/gpuarray/test_neighbours.py
+++ b/tests/gpuarray/test_neighbours.py
@@ -1,6 +1,6 @@
+from aesara.gpuarray.neighbours import GpuImages2Neibs
 from tests.gpuarray.config import mode_with_gpu
 from tests.tensor.nnet import test_neighbours
-from theano.gpuarray.neighbours import GpuImages2Neibs
 
 
 class TestGpuImages2Neibs(test_neighbours.TestImages2Neibs):
diff --git a/tests/gpuarray/test_nnet.py b/tests/gpuarray/test_nnet.py
index 4cd6ba558e..655edb4c21 100644
--- a/tests/gpuarray/test_nnet.py
+++ b/tests/gpuarray/test_nnet.py
@@ -1,19 +1,19 @@
 import numpy as np
 
+import aesara
+import aesara.tensor as tt
 import tests.unittest_tools as utt
-import theano
-import theano.tensor as tt
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
-from theano.gpuarray.nnet import (
+from aesara.gpuarray.nnet import (
     GpuCrossentropySoftmax1HotWithBiasDx,
     GpuCrossentropySoftmaxArgmax1HotWithBias,
     GpuSoftmax,
     GpuSoftmaxWithBias,
 )
-from theano.gradient import grad
-from theano.tensor.math import argmax, log, mean
-from theano.tensor.nnet import crossentropy_softmax_1hot_with_bias_dx
-from theano.tensor.type import fmatrix, fvector, lvector, matrix, vector
+from aesara.gradient import grad
+from aesara.tensor.math import argmax, log, mean
+from aesara.tensor.nnet import crossentropy_softmax_1hot_with_bias_dx
+from aesara.tensor.type import fmatrix, fvector, lvector, matrix, vector
+from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
 
 
 mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
@@ -27,7 +27,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
     batch_size = 4097
     n_out = 1250
 
-    if not isinstance(mode_with_gpu, theano.compile.debugmode.DebugMode):
+    if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode):
         n_in = 4098
         n_out = 4099
 
@@ -52,21 +52,21 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
 
     dot_value = np.asarray(np.dot(xx, W_values), dtype="float32")
     del W_values
-    p_y_given_x = theano.tensor.nnet.softmax(dot_result + b)
+    p_y_given_x = aesara.tensor.nnet.softmax(dot_result + b)
     y_pred = argmax(p_y_given_x, axis=-1)
     loss = -mean(log(p_y_given_x)[tt.arange(y.shape[0]), y])
     dW = grad(loss, dot_result)
-    classify = theano.function(
+    classify = aesara.function(
         inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu
     )
-    classify_gpu = theano.function(
+    classify_gpu = aesara.function(
         inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu
     )
 
     assert any(
         [
             isinstance(
-                node.op, theano.tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias
+                node.op, aesara.tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias
             )
             for node in classify.maker.fgraph.toposort()
         ]
@@ -94,7 +94,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
     batch_size = 4097
     n_out = 1250
 
-    if not isinstance(mode_with_gpu, theano.compile.debugmode.DebugMode):
+    if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode):
         n_out = 4099
 
     # Seed numpy.random with config.unittests__rseed
@@ -108,14 +108,14 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
     softmax_output /= softmax_output.sum(axis=1).reshape(softmax_output.shape[1], 1)
     op = crossentropy_softmax_1hot_with_bias_dx(dnll_value, softmax_output, y_idx_value)
 
-    cpu_f = theano.function([softmax_output], op, mode=mode_without_gpu)
-    gpu_f = theano.function([softmax_output], op, mode=mode_with_gpu)
-    # theano.printing.debugprint(cpu_f)
-    # theano.printing.debugprint(gpu_f)
+    cpu_f = aesara.function([softmax_output], op, mode=mode_without_gpu)
+    gpu_f = aesara.function([softmax_output], op, mode=mode_with_gpu)
+    # aesara.printing.debugprint(cpu_f)
+    # aesara.printing.debugprint(gpu_f)
 
     assert any(
         [
-            isinstance(node.op, theano.tensor.nnet.CrossentropySoftmax1HotWithBiasDx)
+            isinstance(node.op, aesara.tensor.nnet.CrossentropySoftmax1HotWithBiasDx)
             for node in cpu_f.maker.fgraph.toposort()
         ]
     )
@@ -161,11 +161,11 @@ def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
     x = matrix("x", dtype=dtypeInput)
     b = vector("b", dtype=dtypeBias)
 
-    z = theano.tensor.nnet.softmax_with_bias(x, b)
+    z = aesara.tensor.nnet.softmax_with_bias(x, b)
 
-    f = theano.function([x, b], z, mode=mode_without_gpu)
-    f_gpu = theano.function([x, b], z, mode=mode_with_gpu)
-    assert f.maker.fgraph.toposort()[-1].op == theano.tensor.nnet.softmax_with_bias
+    f = aesara.function([x, b], z, mode=mode_without_gpu)
+    f_gpu = aesara.function([x, b], z, mode=mode_with_gpu)
+    assert f.maker.fgraph.toposort()[-1].op == aesara.tensor.nnet.softmax_with_bias
     assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op, GpuSoftmaxWithBias)
 
     def cmp(n, m):
@@ -213,10 +213,10 @@ def softmax_unittest_template(dtypeInput):
 
     x = matrix("x", dtype=dtypeInput)
 
-    z = theano.tensor.nnet.softmax(x)
-    f = theano.function([x], z, mode=mode_without_gpu)
-    f_gpu = theano.function([x], z, mode=mode_wo_cudnn)
-    assert f.maker.fgraph.toposort()[-1].op == theano.tensor.nnet.softmax_op
+    z = aesara.tensor.nnet.softmax(x)
+    f = aesara.function([x], z, mode=mode_without_gpu)
+    f_gpu = aesara.function([x], z, mode=mode_wo_cudnn)
+    assert f.maker.fgraph.toposort()[-1].op == aesara.tensor.nnet.softmax_op
     assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op, GpuSoftmax)
 
     def cmp(n, m):
@@ -256,9 +256,9 @@ def _test_softmax(self, x, x_gpu, f_z, f_gpu_z, cmp):
         f_z_out = f_z(x)
         f_gpu_z_out = f_gpu_z(x_gpu)
 
-        f = theano.function([x], f_z_out, mode=mode_without_gpu)
-        f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=self.mode)
-        self._check_types(f, f_gpu, theano.tensor.nnet.Softmax, self.gpu_op)
+        f = aesara.function([x], f_z_out, mode=mode_without_gpu)
+        f_gpu = aesara.function([x_gpu], f_gpu_z_out, mode=self.mode)
+        self._check_types(f, f_gpu, aesara.tensor.nnet.Softmax, self.gpu_op)
 
         # we need to test n>32*1024 to check that we make the block loop.
         cmp(1, 5, f, f_gpu)
@@ -306,7 +306,7 @@ def _check_types(self, graph, graph_gpu, f_type, f_gpu_type):
 
     def test_softmax(self):
         x = fmatrix("x")
-        z = theano.tensor.nnet.softmax_op
+        z = aesara.tensor.nnet.softmax_op
 
         f, f_gpu = self._test_softmax(x, x, z, z, self._cmp)
 
@@ -314,8 +314,8 @@ def test_softmax(self):
 
     def test_softmax_shape_0(self):
         x = fmatrix("x")
-        z = theano.tensor.nnet.softmax_op
+        z = aesara.tensor.nnet.softmax_op
 
         f, f_gpu = self._test_softmax(x, x, z, z, self._cmp)
-        # Theano can handle that case, but cudnn can't
+        # Aesara can handle that case, but cudnn can't
         self._cmp(0, 10, f, f_gpu)
diff --git a/tests/gpuarray/test_opt.py b/tests/gpuarray/test_opt.py
index 4afb7b3406..869d2813ba 100644
--- a/tests/gpuarray/test_opt.py
+++ b/tests/gpuarray/test_opt.py
@@ -1,19 +1,15 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.gpuarray
-import theano.tensor.slinalg as slinalg
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, test_ctx_name
-from tests.tensor.test_basic import TestSpecifyShape
-from tests.test_ifelse import TestIfelse
-from theano import tensor as tt
-from theano.assert_op import Assert, assert_op
-from theano.breakpoint import PdbBreakpoint
-from theano.configdefaults import config
-from theano.gpuarray import basic_ops, blas, dnn, opt
-from theano.gpuarray.basic_ops import (
+import aesara
+import aesara.gpuarray
+import aesara.tensor.slinalg as slinalg
+from aesara import tensor as tt
+from aesara.assert_op import Assert, assert_op
+from aesara.breakpoint import PdbBreakpoint
+from aesara.configdefaults import config
+from aesara.gpuarray import basic_ops, blas, dnn, opt
+from aesara.gpuarray.basic_ops import (
     GpuAlloc,
     GpuAllocEmpty,
     GpuFromHost,
@@ -21,24 +17,24 @@
     HostFromGpu,
     host_from_gpu,
 )
-from theano.gpuarray.blas import GpuGemm
-from theano.gpuarray.dnn import GpuDnnReduction
-from theano.gpuarray.elemwise import (
+from aesara.gpuarray.blas import GpuGemm
+from aesara.gpuarray.dnn import GpuDnnReduction
+from aesara.gpuarray.elemwise import (
     Elemwise,
     GpuCAReduceCPY,
     GpuCAReduceCuda,
     GpuElemwise,
     max_inputs_to_GpuElemwise,
 )
-from theano.gpuarray.linalg import GpuCholesky, GpuCusolverSolve, cusolver_available
-from theano.gpuarray.subtensor import GpuSubtensor
-from theano.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
-from theano.graph.opt import check_stack_trace
-from theano.tensor.basic import Alloc, AllocEmpty, MakeVector, Rebroadcast
-from theano.tensor.blas import batched_dot
-from theano.tensor.math import dot, eq, exp, gt, tanh
-from theano.tensor.nnet import abstract_conv
-from theano.tensor.type import (
+from aesara.gpuarray.linalg import GpuCholesky, GpuCusolverSolve, cusolver_available
+from aesara.gpuarray.subtensor import GpuSubtensor
+from aesara.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor
+from aesara.graph.opt import check_stack_trace
+from aesara.tensor.basic import Alloc, AllocEmpty, MakeVector, Rebroadcast
+from aesara.tensor.blas import batched_dot
+from aesara.tensor.math import dot, eq, exp, gt, tanh
+from aesara.tensor.nnet import abstract_conv
+from aesara.tensor.type import (
     TensorType,
     bmatrix,
     cscalar,
@@ -54,24 +50,28 @@
     tensor3,
     vector,
 )
+from tests import unittest_tools as utt
+from tests.gpuarray.config import mode_with_gpu, mode_without_gpu, test_ctx_name
+from tests.tensor.test_basic import TestSpecifyShape
+from tests.test_ifelse import TestIfelse
 
 
 def _check_stack_trace(thing):
-    from theano.tensor.shape import Shape, Shape_i
+    from aesara.tensor.shape import Shape, Shape_i
 
     def _ops_to_check(op):
-        if not isinstance(op, theano.graph.op.Op):
+        if not isinstance(op, aesara.graph.op.Op):
             op = op.op  # assume it is an apply node
         return not isinstance(
             op,
             (
                 Shape_i,
                 Shape,
-                theano.compile.ops.DeepCopyOp,
+                aesara.compile.ops.DeepCopyOp,
                 MakeVector,
-                theano.tensor.subtensor.Subtensor,
-                theano.tensor.elemwise.Elemwise,
-                theano.ifelse.IfElse,
+                aesara.tensor.subtensor.Subtensor,
+                aesara.tensor.elemwise.Elemwise,
+                aesara.ifelse.IfElse,
                 GpuFromHost,
                 HostFromGpu,
             ),
@@ -83,7 +83,7 @@ def _ops_to_check(op):
 def test_local_assert():
     x = fmatrix()
     a = assert_op(x, eq(x, 0).any())
-    f = theano.function([x], a, mode=mode_with_gpu)
+    f = aesara.function([x], a, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     a_op = [n for n in topo if isinstance(n.op, Assert)]
     assert len(a_op) == 1
@@ -95,19 +95,19 @@ def test_local_remove_all_assert():
     a = assert_op(x, eq(x, 0).any())
 
     # By default `unsafe` should not be there
-    f = theano.function([x], a, mode=mode_with_gpu.excluding("unsafe"))
+    f = aesara.function([x], a, mode=mode_with_gpu.excluding("unsafe"))
     topo = f.maker.fgraph.toposort()
     a_op = [n for n in topo if isinstance(n.op, Assert)]
     assert len(a_op) == 1
 
     # Put `unsafe`
-    f = theano.function([x], a, mode=mode_with_gpu.including("unsafe"))
+    f = aesara.function([x], a, mode=mode_with_gpu.including("unsafe"))
     topo = f.maker.fgraph.toposort()
     a_op = [n for n in topo if isinstance(n.op, Assert)]
     assert len(a_op) == 0
 
     # Remove `unsafe`
-    f = theano.function([x], a, mode=mode_with_gpu.excluding("unsafe"))
+    f = aesara.function([x], a, mode=mode_with_gpu.excluding("unsafe"))
     topo = f.maker.fgraph.toposort()
     a_op = [n for n in topo if isinstance(n.op, Assert)]
     assert len(a_op) == 1
@@ -117,8 +117,8 @@ def test_local_gpu_contiguous_gpu_contiguous():
     a = fmatrix()
     o1 = basic_ops.gpu_contiguous(a)
     o2 = basic_ops.gpu_contiguous(o1)
-    f1 = theano.function([a], o1, mode=mode_with_gpu)
-    f2 = theano.function([a], o2, mode=mode_with_gpu)
+    f1 = aesara.function([a], o1, mode=mode_with_gpu)
+    f2 = aesara.function([a], o2, mode=mode_with_gpu)
     assert 1 == len(
         [
             node
@@ -139,8 +139,8 @@ def test_local_gpu_contiguous_gpu_contiguous():
 
 def test_local_gpu_contiguous():
     a = fmatrix()
-    o = theano.tensor.extra_ops.cpu_contiguous(a)
-    f = theano.function([a], o, mode=mode_with_gpu)
+    o = aesara.tensor.extra_ops.cpu_contiguous(a)
+    f = aesara.function([a], o, mode=mode_with_gpu)
     assert 1 == len(
         [
             node
@@ -154,7 +154,7 @@ def test_local_gpu_contiguous():
 
 def test_flatten():
     m = fmatrix()
-    f = theano.function([m], m.flatten(), mode=mode_with_gpu)
+    f = aesara.function([m], m.flatten(), mode=mode_with_gpu)
     val = np.random.rand(10, 11).astype("float32")
     res = f(val)
     utt.assert_allclose(res, val.flatten())
@@ -167,7 +167,7 @@ def test_flatten():
     assert GpuReshape in [type(node.op) for node in f.maker.fgraph.toposort()]
     assert _check_stack_trace(f)
 
-    f = theano.function(
+    f = aesara.function(
         [m], m.flatten(ndim=2), mode=mode_with_gpu.excluding("local_useless_reshape")
     )
     val = np.random.rand(10, 11).astype("float32")
@@ -178,7 +178,7 @@ def test_flatten():
     assert _check_stack_trace(f)
 
     m = tensor3()
-    f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu)
+    f = aesara.function([m], m.flatten(ndim=2), mode=mode_with_gpu)
     val = np.random.rand(10, 11, 12).astype("float32")
     res = f(val)
     utt.assert_allclose(res, val.reshape(10, -1))
@@ -197,7 +197,7 @@ def test_reduce():
         ("min", {}),
     ]:
         m = fmatrix()
-        f = theano.function(
+        f = aesara.function(
             [m], getattr(m, method)(axis=0, **param), mode=mode_with_gpu
         )
         # assert _check_stack_trace(f) this op is ok but since
@@ -233,7 +233,7 @@ def test_local_gpualloc_memset_0():
     # Test with 0 from CPU op.
     # Should not be transferred as the only client is the output
     a = tt.alloc(z, i)
-    f = theano.function([i], a, mode=mode_with_gpu)
+    f = aesara.function([i], a, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 1
     assert isinstance(topo[0].op, Alloc)
@@ -243,7 +243,7 @@ def test_local_gpualloc_memset_0():
     # Test with 0 from CPU op.
     # Should be transferred as it is used by another op.
     a = tt.alloc(z, i)
-    f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
+    f = aesara.function([i], a.cumsum(), mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 3
     assert isinstance(topo[0].op, GpuAlloc)
@@ -252,7 +252,7 @@ def test_local_gpualloc_memset_0():
 
     # Test with 0
     a = GpuAlloc(test_ctx_name)(z, i)
-    f = theano.function([i], a, mode=mode_with_gpu)
+    f = aesara.function([i], a, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 1
     assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
@@ -261,7 +261,7 @@ def test_local_gpualloc_memset_0():
 
     # Test with 1
     a = GpuAlloc(test_ctx_name)(o, i)
-    f = theano.function([i], a, mode=mode_with_gpu)
+    f = aesara.function([i], a, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 1
     assert isinstance(topo[0].op, GpuAlloc)
@@ -271,7 +271,7 @@ def test_local_gpualloc_memset_0():
 
     # Test with 1, 1
     a = GpuAlloc(test_ctx_name)(ones, i)
-    f = theano.function([i], a, mode=mode_with_gpu)
+    f = aesara.function([i], a, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 1
     assert isinstance(topo[0].op, GpuAlloc)
@@ -287,7 +287,7 @@ def test_local_gpualloc_empty():
     # Test with vector
     # Should not be moved as the only client is the output
     a = AllocEmpty("float32")(i)
-    f = theano.function([i], a, mode=mode_with_gpu)
+    f = aesara.function([i], a, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 1
     assert isinstance(topo[0].op, AllocEmpty)
@@ -298,7 +298,7 @@ def test_local_gpualloc_empty():
     # Test with vector
     # Should be moved
     a = AllocEmpty("float32")(i)
-    f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
+    f = aesara.function([i], a.cumsum(), mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 3
     assert isinstance(topo[0].op, GpuAllocEmpty)
@@ -308,7 +308,7 @@ def test_local_gpualloc_empty():
 
     # Test with matrix
     a = AllocEmpty("float32")(i, ii)
-    f = theano.function([i, ii], a.cumsum(axis=0), mode=mode_with_gpu)
+    f = aesara.function([i, ii], a.cumsum(axis=0), mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 3
     assert isinstance(topo[0].op, GpuAllocEmpty)
@@ -321,7 +321,7 @@ def test_rebroadcast():
     d = np.random.rand(10, 10).astype("float32")
     v = fmatrix()
     up = tt.unbroadcast(v.sum().dimshuffle("x", "x"), 0, 1)
-    f = theano.function([v], [up], mode=mode_with_gpu)
+    f = aesara.function([v], [up], mode=mode_with_gpu)
 
     f(d)
 
@@ -350,21 +350,21 @@ def cast_output(v):
     shared = staticmethod(gpuarray_shared_constructor)
 
     def get_ifelse(self, n):
-        return theano.ifelse.IfElse(n, gpu=True, as_view=True)
+        return aesara.ifelse.IfElse(n, gpu=True, as_view=True)
 
     def test_lifter_with_inputs_of_graph(self):
         x = vector()
         cond = iscalar()
-        f = theano.function(
-            [x, cond], theano.ifelse.ifelse(cond, x.mean(), x.sum()), mode=mode_with_gpu
+        f = aesara.function(
+            [x, cond], aesara.ifelse.ifelse(cond, x.mean(), x.sum()), mode=mode_with_gpu
         )
         assert f(np.float32([1, 2, 3]), 0) == 6
         assert _check_stack_trace(f)
 
         x = vector()
         cond = scalar()
-        f = theano.function(
-            [x, cond], theano.ifelse.ifelse(cond, x.mean(), x.sum()), mode=mode_with_gpu
+        f = aesara.function(
+            [x, cond], aesara.ifelse.ifelse(cond, x.mean(), x.sum()), mode=mode_with_gpu
         )
         assert f(np.float32([1, 2, 3]), 0) == 6
         assert _check_stack_trace(f)
@@ -376,18 +376,18 @@ def test_lifter_with_shared_var(self):
         )
         z = tt.constant(2.0)
 
-        a = theano.ifelse.ifelse(x, y, z)
+        a = aesara.ifelse.ifelse(x, y, z)
         with config.change_flags(on_opt_error="raise"):
-            theano.function([x], [a], mode=mode_with_gpu)
+            aesara.function([x], [a], mode=mode_with_gpu)
 
 
 def test_print_op():
     # Test that print ops don't block gpu optimization
     b = fmatrix()
-    f = theano.function([b], theano.printing.Print()(b) * 2, mode=mode_with_gpu)
+    f = aesara.function([b], aesara.printing.Print()(b) * 2, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert isinstance(topo[0].op, GpuFromHost)
-    assert isinstance(topo[1].op, theano.printing.Print)
+    assert isinstance(topo[1].op, aesara.printing.Print)
     assert isinstance(topo[2].op, GpuElemwise)
     assert topo[3].op == host_from_gpu
     assert _check_stack_trace(f)
@@ -404,7 +404,7 @@ def test_pdbbreakpoint_op():
     b_monitored = PdbBreakpoint(name="TestBreakpoint")(condition, b)
     output = b_monitored ** 2
 
-    f = theano.function([b], output, mode=mode_with_gpu)
+    f = aesara.function([b], output, mode=mode_with_gpu)
 
     # Ensure that, in the compiled function, the computation following the
     # breakpoint has been moved to the gpu.
@@ -428,13 +428,13 @@ def fn_max_abs(x, axis):
         return abs(x).max(axis=axis)
 
     for fn, pre_scalar_op in (
-        (fn_sum_square, theano.scalar.sqr),
-        (fn_sum_abs, theano.scalar.abs_),
-        (fn_max_abs, theano.scalar.abs_),
+        (fn_sum_square, aesara.scalar.sqr),
+        (fn_sum_abs, aesara.scalar.abs_),
+        (fn_max_abs, aesara.scalar.abs_),
     ):
         for axis in (None, 0, 1):
             o = fn(x, axis)
-            f = theano.function([x], o, mode=mode_with_gpu_no_cudnn)
+            f = aesara.function([x], o, mode=mode_with_gpu_no_cudnn)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 3
             assert isinstance(topo[1].op, GpuCAReduceCuda)
@@ -448,11 +448,11 @@ def test_local_lift_dot22scalar():
     x = matrix()
     y = matrix()
     a = scalar()
-    o = theano.tensor.blas.Dot22Scalar()(x, y, a)
-    f_cpu = theano.function([x, y, a], o)
-    f_gpu = theano.function([x, y, a], o, mode=mode_with_gpu)
+    o = aesara.tensor.blas.Dot22Scalar()(x, y, a)
+    f_cpu = aesara.function([x, y, a], o)
+    f_gpu = aesara.function([x, y, a], o, mode=mode_with_gpu)
     assert not any(
-        isinstance(n.op, theano.tensor.blas.Dot22Scalar)
+        isinstance(n.op, aesara.tensor.blas.Dot22Scalar)
         for n in f_gpu.maker.fgraph.apply_nodes
     )
     assert any(isinstance(n.op, GpuGemm) for n in f_gpu.maker.fgraph.apply_nodes)
@@ -465,28 +465,28 @@ def test_local_lift_dot22scalar():
 
 def test_local_gpu_subtensor():
     # Test shared forced on CPU.
-    t = theano.shared(np.zeros(20, "float32"))
-    f = theano.function([], t[3:4], mode=mode_with_gpu)
+    t = aesara.shared(np.zeros(20, "float32"))
+    f = aesara.function([], t[3:4], mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
-    assert any([type(node.op) is theano.tensor.subtensor.Subtensor for node in topo])
+    assert any([type(node.op) is aesara.tensor.subtensor.Subtensor for node in topo])
     assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
     assert _check_stack_trace(f)
 
     # Test graph input.
     t = fmatrix()
-    f = theano.function([t], t[3:4], mode=mode_with_gpu)
+    f = aesara.function([t], t[3:4], mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
-    assert any([type(node.op) is theano.tensor.subtensor.Subtensor for node in topo])
+    assert any([type(node.op) is aesara.tensor.subtensor.Subtensor for node in topo])
     assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
     assert _check_stack_trace(f)
 
     # Test multiple use of the input
     # We want the subtensor to be on the GPU to prevent multiple transfer.
     t = fmatrix()
-    f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu)
+    f = aesara.function([t], [t[3:4], t + 1], mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert not any(
-        [type(node.op) is theano.tensor.subtensor.Subtensor for node in topo]
+        [type(node.op) is aesara.tensor.subtensor.Subtensor for node in topo]
     )
     assert any([isinstance(node.op, GpuSubtensor) for node in topo])
     assert _check_stack_trace(f)
@@ -494,25 +494,25 @@ def test_local_gpu_subtensor():
     # Test multiple use of the input + input as output
     # We want the subtensor to be on the GPU to prevent multiple transfer.
     t = fmatrix()
-    f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu)
+    f = aesara.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert not any(
-        [type(node.op) is theano.tensor.subtensor.Subtensor for node in topo]
+        [type(node.op) is aesara.tensor.subtensor.Subtensor for node in topo]
     )
     assert any([isinstance(node.op, GpuSubtensor) for node in topo])
     assert _check_stack_trace(f)
 
     # Test shared forced on CPU end we do computation on the output of
     # the subtensor.
-    t = theano.shared(np.zeros(20, "float32"))
-    f = theano.function([], t[3:4] + 1, mode=mode_with_gpu)
+    t = aesara.shared(np.zeros(20, "float32"))
+    f = aesara.function([], t[3:4] + 1, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
-    assert any([type(node.op) is theano.tensor.subtensor.Subtensor for node in topo])
+    assert any([type(node.op) is aesara.tensor.subtensor.Subtensor for node in topo])
     assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
     # Our optimizer isn't smart enough to move to the GPU Elemwise.
     # If it where just a little bit smarter, it could wrongly move it to the GPU.
     # If it where super smart, it would know it should not move it to the GPU.
-    assert any([isinstance(node.op, theano.tensor.elemwise.Elemwise) for node in topo])
+    assert any([isinstance(node.op, aesara.tensor.elemwise.Elemwise) for node in topo])
     assert _check_stack_trace(f)
 
 
@@ -529,41 +529,41 @@ def test_local_gpu_elemwise():
 
     # Due to optimization order, this composite is created when all
     # the op are on the gpu.
-    f = theano.function([a, b, c], a + b + c, mode=mode_with_gpu)
+    f = aesara.function([a, b, c], a + b + c, mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
-    assert sum(type(node.op) == theano.tensor.elemwise.Elemwise for node in topo) == 0
+    assert sum(type(node.op) == aesara.tensor.elemwise.Elemwise for node in topo) == 0
     utt.assert_allclose(f(a_v, b_v, c_v), a_v + b_v + c_v)
     assert _check_stack_trace(f)
 
     # Now test with the composite already on the cpu before we move it
     # to the gpu
-    a_s = theano.scalar.int8()
-    b_s = theano.scalar.float32()
-    c_s = theano.scalar.float32()
-    out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s + c_s])
-    out_op = theano.tensor.elemwise.Elemwise(out_s)
-    f = theano.function([a, b, c], out_op(a, b, c), mode=mode_with_gpu)
+    a_s = aesara.scalar.int8()
+    b_s = aesara.scalar.float32()
+    c_s = aesara.scalar.float32()
+    out_s = aesara.scalar.Composite([a_s, b_s, c_s], [a_s + b_s + c_s])
+    out_op = aesara.tensor.elemwise.Elemwise(out_s)
+    f = aesara.function([a, b, c], out_op(a, b, c), mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
-    assert sum(type(node.op) == theano.tensor.elemwise.Elemwise for node in topo) == 0
+    assert sum(type(node.op) == aesara.tensor.elemwise.Elemwise for node in topo) == 0
     utt.assert_allclose(f(a_v, b_v, c_v), a_v + b_v + c_v)
     assert _check_stack_trace(f)
 
     return  # Not yet implemeted
     # Test multiple output
-    a_s = theano.scalar.float32()
+    a_s = aesara.scalar.float32()
     a = fmatrix()
-    from theano.scalar.basic import identity
+    from aesara.scalar.basic import identity
 
-    out_s = theano.scalar.Composite(
+    out_s = aesara.scalar.Composite(
         [a_s, b_s, c_s], [identity(a_s), identity(c_s), identity(b_s)]
     )
-    outs_op = theano.tensor.elemwise.Elemwise(out_s)
-    f = theano.function([a, b, c], outs_op(a, b, c), mode=mode_with_gpu)
+    outs_op = aesara.tensor.elemwise.Elemwise(out_s)
+    f = aesara.function([a, b, c], outs_op(a, b, c), mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
-    assert sum(type(node.op) == theano.tensor.elemwise.Elemwise for node in topo) == 0
+    assert sum(type(node.op) == aesara.tensor.elemwise.Elemwise for node in topo) == 0
     out = f(a_v, b_v, c_v)
     utt.assert_allclose(out[0], a_v)
     utt.assert_allclose(out[1], c_v)
@@ -571,12 +571,12 @@ def test_local_gpu_elemwise():
     assert _check_stack_trace(f)
 
     # Test multiple output
-    out_s = theano.scalar.Composite([a_s, b_s, c_s], [a_s + b_s, a_s * b_s])
-    outs_op = theano.tensor.elemwise.Elemwise(out_s)
-    f = theano.function([a, b, c], outs_op(a, b, c), mode=mode_with_gpu)
+    out_s = aesara.scalar.Composite([a_s, b_s, c_s], [a_s + b_s, a_s * b_s])
+    outs_op = aesara.tensor.elemwise.Elemwise(out_s)
+    f = aesara.function([a, b, c], outs_op(a, b, c), mode=mode_with_gpu)
     topo = f.maker.fgraph.toposort()
     assert sum(isinstance(node.op, GpuElemwise) for node in topo) == 1
-    assert sum(type(node.op) == theano.tensor.elemwise.Elemwise for node in topo) == 0
+    assert sum(type(node.op) == aesara.tensor.elemwise.Elemwise for node in topo) == 0
     out = f(a_v, b_v, c_v)
     utt.assert_allclose(out[0], a_v + b_v)
     utt.assert_allclose(out[1], a_v * c_v)
@@ -584,7 +584,7 @@ def test_local_gpu_elemwise():
 
     # Test non-contiguous input
     c = gpuarray_shared_constructor(np.asarray(c_v, dtype="float32"))
-    f = theano.function([a, b], outs_op(a[::2], b[::2], c[::2]), mode=mode_with_gpu)
+    f = aesara.function([a, b], outs_op(a[::2], b[::2], c[::2]), mode=mode_with_gpu)
     out = f(a_v, b_v)
     utt.assert_allclose(out[0], a_v[::2] + b_v[::2])
     utt.assert_allclose(out[1], a_v[::2] * c_v[::2])
@@ -598,7 +598,7 @@ def test_many_arg_elemwise():
     rng = np.random.RandomState([1, 2, 3])
     nb_of_inputs_overflows = []
     for num_args in [64]:
-        for op_to_test in [theano.tensor.add, theano.tensor.mul]:
+        for op_to_test in [aesara.tensor.add, aesara.tensor.mul]:
             for nb_dim in [2, 8]:
                 shapes = [rng.randint(1, 5) for i in range(nb_dim)]
                 args = [
@@ -614,7 +614,7 @@ def test_many_arg_elemwise():
                 for mode in [mode_with_gpu, mode_without_gpu]:
                     # test the optimization local_gpua_elemwise
                     output = op_to_test(*symb_args)
-                    f = theano.function(symb_args, output, mode=mode)
+                    f = aesara.function(symb_args, output, mode=mode)
                     outputs.append(f(*args))
 
                     # assert that the test was done on the gpu.
@@ -646,19 +646,19 @@ def test_not_useless_scalar_gpuelemwise():
     with config.change_flags(warn_float64="ignore"):
         X = fmatrix()
         x = np.random.randn(32, 32).astype(np.float32)
-        m1 = theano.shared(np.random.randn(32, 32).astype(np.float32))
+        m1 = aesara.shared(np.random.randn(32, 32).astype(np.float32))
         loss = (X - dot(X, m1)).norm(L=2)
-        lr = theano.shared(np.asarray(0.001, dtype=np.float32))
-        grad = theano.grad(loss, m1)
+        lr = aesara.shared(np.asarray(0.001, dtype=np.float32))
+        grad = aesara.grad(loss, m1)
 
-        train = theano.function(
+        train = aesara.function(
             inputs=[X], updates=[(m1, m1 - lr * grad)], mode=mode_with_gpu
         )
         train(x)
         topo = train.maker.fgraph.toposort()
         gemms = [app for app in topo if isinstance(app.op, GpuGemm)]
         assert len(gemms) == 2
-        assert isinstance(gemms[1].inputs[1].owner.op, theano.tensor.elemwise.Elemwise)
+        assert isinstance(gemms[1].inputs[1].owner.op, aesara.tensor.elemwise.Elemwise)
 
 
 def test_local_lift_abstractconv_gpu_shape():
@@ -666,8 +666,8 @@ def test_local_lift_abstractconv_gpu_shape():
         s = ivector()
         a = ftensor4()
         b = ftensor4()
-        c = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights()(a, b, s)
-        f = theano.function([s, a, b], c, mode=mode_with_gpu)
+        c = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights()(a, b, s)
+        f = aesara.function([s, a, b], c, mode=mode_with_gpu)
         assert _check_stack_trace(f)
 
 
@@ -682,10 +682,10 @@ def test_local_assert_no_cpu_op():
 
     with config.change_flags(assert_no_cpu_op="raise", on_opt_error="ignore"):
         with pytest.raises(AssertionError):
-            theano.function([], out, mode=mode_local_assert)
+            aesara.function([], out, mode=mode_local_assert)
 
     with config.change_flags(assert_no_cpu_op="ignore"):
-        f = theano.function([], out, mode=mode_local_assert)
+        f = aesara.function([], out, mode=mode_local_assert)
         assert _check_stack_trace(f)
 
 
@@ -694,7 +694,7 @@ def test_no_complex():
     freq_var = fscalar()
     signal_var = fscalar()
     stft_out = exp(width_var * freq_var) * signal_var
-    f = theano.function([width_var, freq_var, signal_var], stft_out, mode=mode_with_gpu)
+    f = aesara.function([width_var, freq_var, signal_var], stft_out, mode=mode_with_gpu)
     assert _check_stack_trace(f)
 
 
@@ -706,8 +706,8 @@ def test_local_lift_solve():
     A = fmatrix()
     b = fmatrix()
     o = slinalg.solve(A, b)
-    f_cpu = theano.function([A, b], o, mode_without_gpu)
-    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
+    f_cpu = aesara.function([A, b], o, mode_without_gpu)
+    f_gpu = aesara.function([A, b], o, mode=mode_with_gpu)
     assert not any(
         isinstance(n.op, slinalg.Solve) for n in f_gpu.maker.fgraph.apply_nodes
     )
@@ -729,8 +729,8 @@ def test_gpu_solve_not_inplace():
     b = fmatrix()
     s = slinalg.solve(A, b)
     o = dot(A, s)
-    f_cpu = theano.function([A, b], o, mode_without_gpu)
-    f_gpu = theano.function([A, b], o, mode=mode_with_gpu)
+    f_cpu = aesara.function([A, b], o, mode_without_gpu)
+    f_gpu = aesara.function([A, b], o, mode=mode_with_gpu)
     count_not_inplace = len(
         [
             n.op
@@ -751,8 +751,8 @@ def test_gpu_solve_not_inplace():
 def test_local_lift_cholesky():
     A = fmatrix()
     o = slinalg.cholesky(A)
-    f_cpu = theano.function([A], o, mode=mode_without_gpu)
-    f_gpu = theano.function([A], o, mode=mode_with_gpu)
+    f_cpu = aesara.function([A], o, mode=mode_without_gpu)
+    f_gpu = aesara.function([A], o, mode=mode_with_gpu)
     assert not any(
         isinstance(n.op, slinalg.Cholesky) for n in f_gpu.maker.fgraph.apply_nodes
     )
@@ -775,8 +775,8 @@ def test_gpu_cholesky_not_inplace():
     A_squared = A ** 2
     B = slinalg.cholesky(A_squared)
     D = B + A_squared
-    f_cpu = theano.function([A], D, mode=mode_without_gpu)
-    f_gpu = theano.function([A], D, mode=mode_with_gpu)
+    f_cpu = aesara.function([A], D, mode=mode_without_gpu)
+    f_gpu = aesara.function([A], D, mode=mode_with_gpu)
     # GpuCholesky op in this graph should NOT be inplace (as his input is reused in another op)
     count_cholesky_not_inplace = len(
         [
@@ -797,9 +797,9 @@ def test_local_gpua_advanced_incsubtensor():
     target = ftensor4()
     y = target.dimshuffle(1, 0, 2, 3).flatten(ndim=1)
     w = tt.ones_like(y)
-    w = theano.tensor.subtensor.set_subtensor(w[eq(y, 1.0).nonzero()], 100)
-    w = theano.tensor.subtensor.set_subtensor(w[eq(y, -1.0).nonzero()], 0)
-    f = theano.function([target], w)
+    w = aesara.tensor.subtensor.set_subtensor(w[eq(y, 1.0).nonzero()], 100)
+    w = aesara.tensor.subtensor.set_subtensor(w[eq(y, -1.0).nonzero()], 0)
+    f = aesara.function([target], w)
     assert _check_stack_trace(f)
 
 
@@ -826,7 +826,7 @@ def randX(*args):
             "y"
         )
         z = batched_dot(x, y)
-        f = theano.function([x, y], z, mode=mode_with_gpu)
+        f = aesara.function([x, y], z, mode=mode_with_gpu)
         f(x_val, y_val)
         assert check_stack_trace(f, ops_to_check="all")
 
@@ -835,15 +835,15 @@ def test_crossentropycategorical1hot_lifter():
     rng = np.random.RandomState(utt.fetch_seed())
     x = matrix()
     y = lvector()
-    z = theano.tensor.nnet.crossentropy_categorical_1hot(x, y)
-    gx = theano.grad(z.mean(), x)
-    f = theano.function([x, y], [z, gx], mode=mode_with_gpu)
+    z = aesara.tensor.nnet.crossentropy_categorical_1hot(x, y)
+    gx = aesara.grad(z.mean(), x)
+    f = aesara.function([x, y], [z, gx], mode=mode_with_gpu)
     assert not any(
         isinstance(
             n.op,
             (
-                theano.tensor.nnet.CrossentropyCategorical1Hot,
-                theano.tensor.nnet.CrossentropyCategorical1HotGrad,
+                aesara.tensor.nnet.CrossentropyCategorical1Hot,
+                aesara.tensor.nnet.CrossentropyCategorical1HotGrad,
             ),
         )
         for n in f.maker.fgraph.apply_nodes
@@ -870,8 +870,8 @@ def optimizer_2d(
         optimiser=None,
     ):
 
-        inp1 = theano.shared(np.random.random(input_shapes[0]).astype(config.floatX))
-        inp2 = theano.shared(np.random.random(input_shapes[1]).astype(config.floatX))
+        inp1 = aesara.shared(np.random.random(input_shapes[0]).astype(config.floatX))
+        inp2 = aesara.shared(np.random.random(input_shapes[1]).astype(config.floatX))
         if op is None:
             inp1 = basic_ops.as_gpuarray_variable(inp1, test_ctx_name)
             inp2 = basic_ops.as_gpuarray_variable(inp2, test_ctx_name)
@@ -925,9 +925,9 @@ def optimizer_2d(
                 # No convolutions optimization takes place
                 assert optimiser.transform(None, conv_op.owner) is None
             else:
-                ref_func = theano.function([], conv_op, mode=mode_with_gpu)
+                ref_func = aesara.function([], conv_op, mode=mode_with_gpu)
                 with config.change_flags(mode=mode):
-                    conv_func = theano.function([], conv_op, mode=mode)
+                    conv_func = aesara.function([], conv_op, mode=mode)
                 assert any(
                     [
                         isinstance(node.op, op)
@@ -949,8 +949,8 @@ def optimizer_3d(
         num_groups=1,
         optimiser=None,
     ):
-        inp1 = theano.shared(np.random.random(input_shapes[0]).astype(config.floatX))
-        inp2 = theano.shared(np.random.random(input_shapes[1]).astype(config.floatX))
+        inp1 = aesara.shared(np.random.random(input_shapes[0]).astype(config.floatX))
+        inp2 = aesara.shared(np.random.random(input_shapes[1]).astype(config.floatX))
 
         if op is None:
             inp1 = basic_ops.as_gpuarray_variable(inp1, None)
@@ -1005,7 +1005,7 @@ def optimizer_3d(
                 return
             elif op != "conv3d2d":
                 with config.change_flags(mode=mode):
-                    conv_func = theano.function([], conv_op, mode=mode)
+                    conv_func = aesara.function([], conv_op, mode=mode)
                 assert any(
                     [
                         isinstance(node.op, op)
@@ -1014,10 +1014,10 @@ def optimizer_3d(
                 )
             else:
                 with config.change_flags(mode=mode):
-                    conv_func = theano.function(
+                    conv_func = aesara.function(
                         [], conv_op, mode=mode_with_gpu.including("conv_meta")
                     )
-            ref_func = theano.function([], conv_op, mode=mode_with_gpu)
+            ref_func = aesara.function([], conv_op, mode=mode_with_gpu)
             utt.assert_allclose(conv_func(), ref_func())
 
     @pytest.mark.skipif(config.cxx == "", reason="Need a c compiler.")
diff --git a/tests/gpuarray/test_others.py b/tests/gpuarray/test_others.py
index 706cc16912..ac79e03d7c 100644
--- a/tests/gpuarray/test_others.py
+++ b/tests/gpuarray/test_others.py
@@ -4,17 +4,17 @@
 
 pygpu = pytest.importorskip("pygpu")
 
-from tests.gpuarray.config import mode_with_gpu, test_ctx_name
-from tests.misc.test_may_share_memory import may_share_memory_core
-from tests.tensor import test_opt
-from theano.gpuarray.basic_ops import GpuFromHost, HostFromGpu
-from theano.gpuarray.type import (
+from aesara.gpuarray.basic_ops import GpuFromHost, HostFromGpu
+from aesara.gpuarray.type import (
     GpuArraySharedVariable,
     GpuArrayType,
     get_context,
     gpuarray_shared_constructor,
 )
-from theano.misc.pkl_utils import dump, load
+from aesara.misc.pkl_utils import dump, load
+from tests.gpuarray.config import mode_with_gpu, test_ctx_name
+from tests.misc.test_may_share_memory import may_share_memory_core
+from tests.tensor import test_opt
 
 
 class TestFusion(test_opt.TestFusion):
diff --git a/tests/gpuarray/test_pickle.py b/tests/gpuarray/test_pickle.py
index 36f5ba7886..868605e569 100644
--- a/tests/gpuarray/test_pickle.py
+++ b/tests/gpuarray/test_pickle.py
@@ -13,8 +13,8 @@
 import numpy as np
 import pytest
 
-from theano.configdefaults import config
-from theano.gpuarray.type import ContextNotDefined
+from aesara.configdefaults import config
+from aesara.gpuarray.type import ContextNotDefined
 
 
 try:
diff --git a/tests/gpuarray/test_pool.py b/tests/gpuarray/test_pool.py
index 42908977c4..cd15ecd663 100644
--- a/tests/gpuarray/test_pool.py
+++ b/tests/gpuarray/test_pool.py
@@ -4,30 +4,30 @@
 import numpy as np
 import pytest
 
-import theano
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
-from tests.gpuarray.test_basic_ops import rand
-from theano import tensor as tt
-from theano.gpuarray.pool import (
+import aesara
+from aesara import tensor as tt
+from aesara.gpuarray.pool import (
     GpuAveragePoolGrad,
     GpuDownsampleFactorMaxGradGrad,
     GpuMaxPoolGrad,
     GpuPool,
 )
-from theano.gradient import Lop, Rop, grad
-from theano.tensor.signal.pool import (
+from aesara.gradient import Lop, Rop, grad
+from aesara.tensor.signal.pool import (
     AveragePoolGrad,
     DownsampleFactorMaxGradGrad,
     MaxPoolGrad,
     Pool,
 )
+from tests import unittest_tools as utt
+from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
+from tests.gpuarray.test_basic_ops import rand
 
 
 class TestPool:
     def test_pool_py_interface(self):
         shp = (2, 2, 2, 2)
-        inp = theano.shared(rand(*shp), "a")
+        inp = aesara.shared(rand(*shp), "a")
         inp = tt.as_tensor_variable(inp)
         with pytest.raises(ValueError):
             # test when pad >= ws
@@ -43,13 +43,13 @@ def test_pool_c_interface(self):
         gpu_mode.check_py_code = False
 
         shp = (2, 2, 2, 2)
-        inp = theano.shared(rand(*shp), "a")
+        inp = aesara.shared(rand(*shp), "a")
         inp = tt.as_tensor_variable(inp)
         with pytest.raises(ValueError):
             # test when ignore_border and pad >= 0
             ds_op = GpuPool(ignore_border=False, ndim=2)
             pad = tt.as_tensor_variable([1, 1])
-            f = theano.function([], ds_op(inp, [2, 2], pad=pad), mode=gpu_mode)
+            f = aesara.function([], ds_op(inp, [2, 2], pad=pad), mode=gpu_mode)
             f()
 
     def test_pool_big_ws(self):
@@ -57,11 +57,11 @@ def test_pool_big_ws(self):
         gpu_mode.check_py_code = False
 
         shp = (2, 2, 2, 2)
-        inp = theano.shared(rand(*shp), "a")
+        inp = aesara.shared(rand(*shp), "a")
         inp = tt.as_tensor_variable(inp)
         ds_op = GpuPool(ignore_border=False, mode="average_exc_pad", ndim=2)
         pad = tt.as_tensor_variable([0, 0])
-        f = theano.function(
+        f = aesara.function(
             [], ds_op(inp, [5, 5], stride=[1, 1], pad=pad), mode=gpu_mode
         )
         f()
@@ -124,11 +124,11 @@ def test_pool2d():
                 # print('test_pool2d', shp, ws, st, pad, mode, ignore_border)
                 ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border)
 
-                a = theano.shared(rand(*shp), "a")
+                a = aesara.shared(rand(*shp), "a")
                 a_pooled = ds_op(tt.as_tensor_variable(a), ws, st, pad)
 
-                f = theano.function([], a_pooled, mode=gpu_mode)
-                f2 = theano.function([], a_pooled, mode=ref_mode)
+                f = aesara.function([], a_pooled, mode=gpu_mode)
+                f2 = aesara.function([], a_pooled, mode=ref_mode)
 
                 assert any(
                     [isinstance(node.op, GpuPool) for node in f.maker.fgraph.toposort()]
@@ -140,8 +140,8 @@ def test_pool2d():
 
                 a_pooled_grad = grad(a_pooled.sum(), a)
 
-                g = theano.function([], a_pooled_grad, mode=gpu_mode)
-                g2 = theano.function([], a_pooled_grad, mode=ref_mode)
+                g = aesara.function([], a_pooled_grad, mode=gpu_mode)
+                g2 = aesara.function([], a_pooled_grad, mode=ref_mode)
 
                 if mode == "max":
                     gop = GpuMaxPoolGrad
@@ -163,10 +163,10 @@ def test_pool2d():
                 if mode != "max":
                     continue
 
-                ea = theano.shared(rand(*shp), "ea")
+                ea = aesara.shared(rand(*shp), "ea")
 
-                gr = theano.function([], Rop(a_pooled, a, ea), mode=gpu_mode)
-                gr2 = theano.function([], Rop(a_pooled, a, ea), mode=ref_mode)
+                gr = aesara.function([], Rop(a_pooled, a, ea), mode=gpu_mode)
+                gr2 = aesara.function([], Rop(a_pooled, a, ea), mode=ref_mode)
 
                 assert any(
                     [
@@ -184,8 +184,8 @@ def test_pool2d():
 
                 ggf = Lop(grad((a_pooled ** 2).sum(), a), a, a)
 
-                gg = theano.function([], ggf, mode=gpu_mode)
-                gg2 = theano.function([], ggf, mode=ref_mode)
+                gg = aesara.function([], ggf, mode=gpu_mode)
+                gg2 = aesara.function([], ggf, mode=ref_mode)
 
                 assert any(
                     [
@@ -250,11 +250,11 @@ def test_pool3d():
                 # print('test_pool3d', shp, ws, st, pad, mode, ignore_border)
                 ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border)
 
-                a = theano.shared(rand(*shp), "a")
+                a = aesara.shared(rand(*shp), "a")
                 a_pooled = ds_op(tt.as_tensor_variable(a), ws, st, pad)
 
-                f = theano.function([], a_pooled, mode=gpu_mode)
-                f2 = theano.function([], a_pooled, mode=ref_mode)
+                f = aesara.function([], a_pooled, mode=gpu_mode)
+                f2 = aesara.function([], a_pooled, mode=ref_mode)
 
                 assert any(
                     [isinstance(node.op, GpuPool) for node in f.maker.fgraph.toposort()]
@@ -266,8 +266,8 @@ def test_pool3d():
 
                 a_pooled_grad = grad(a_pooled.sum(), a)
 
-                g = theano.function([], a_pooled_grad, mode=gpu_mode)
-                g2 = theano.function([], a_pooled_grad, mode=ref_mode)
+                g = aesara.function([], a_pooled_grad, mode=gpu_mode)
+                g2 = aesara.function([], a_pooled_grad, mode=ref_mode)
 
                 if mode == "max":
                     gop = GpuMaxPoolGrad
@@ -289,10 +289,10 @@ def test_pool3d():
                 if mode != "max":
                     continue
 
-                ea = theano.shared(rand(*shp), "ea")
+                ea = aesara.shared(rand(*shp), "ea")
 
-                gr = theano.function([], Rop(a_pooled, a, ea), mode=gpu_mode)
-                gr2 = theano.function([], Rop(a_pooled, a, ea), mode=ref_mode)
+                gr = aesara.function([], Rop(a_pooled, a, ea), mode=gpu_mode)
+                gr2 = aesara.function([], Rop(a_pooled, a, ea), mode=ref_mode)
 
                 assert any(
                     [
@@ -310,8 +310,8 @@ def test_pool3d():
 
                 ggf = Lop(grad((a_pooled ** 2).sum(), a), a, a)
 
-                gg = theano.function([], ggf, mode=gpu_mode)
-                gg2 = theano.function([], ggf, mode=ref_mode)
+                gg = aesara.function([], ggf, mode=gpu_mode)
+                gg2 = aesara.function([], ggf, mode=ref_mode)
 
                 assert any(
                     [
diff --git a/tests/gpuarray/test_reduction.py b/tests/gpuarray/test_reduction.py
index 6c471a66a9..344e9c170c 100644
--- a/tests/gpuarray/test_reduction.py
+++ b/tests/gpuarray/test_reduction.py
@@ -3,16 +3,16 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
+from aesara.gpuarray import GpuArrayType
+from aesara.gpuarray.dnn import GpuDnnReduction
+from aesara.gpuarray.reduction import GpuMaxAndArgmax
+from aesara.tensor.math import argmax
+from aesara.tensor.math import max as tt_max
 from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
 from tests.gpuarray.test_basic_ops import rand_gpuarray
-from theano.gpuarray import GpuArrayType
-from theano.gpuarray.dnn import GpuDnnReduction
-from theano.gpuarray.reduction import GpuMaxAndArgmax
-from theano.tensor.math import argmax
-from theano.tensor.math import max as tt_max
 
 
 # Number of values to be used in test tensors (except with 0-D tensors!).
@@ -30,7 +30,7 @@ def numpy_random_array(shapes):
     size = 1
     for dimsize in shapes:
         size *= dimsize
-    return np.random.normal(size=size).astype(theano.config.floatX).reshape(shapes)
+    return np.random.normal(size=size).astype(aesara.config.floatX).reshape(shapes)
 
 
 def numpy_maxandargmax(X, axis=None):
@@ -55,17 +55,17 @@ def numpy_maxandargmax(X, axis=None):
     return (ref_max, np.argmax(reshaped_x, axis=-1))
 
 
-def check_if_gpu_reduce_in_graph(theano_function):
+def check_if_gpu_reduce_in_graph(aesara_function):
     assert any(
         isinstance(node.op, (GpuMaxAndArgmax, GpuDnnReduction))
-        for node in theano_function.maker.fgraph.apply_nodes
+        for node in aesara_function.maker.fgraph.apply_nodes
     )
 
 
-def check_if_gpu_reduce_not_in_graph(theano_function):
+def check_if_gpu_reduce_not_in_graph(aesara_function):
     assert all(
         not isinstance(node.op, (GpuMaxAndArgmax, GpuDnnReduction))
-        for node in theano_function.maker.fgraph.apply_nodes
+        for node in aesara_function.maker.fgraph.apply_nodes
     )
 
 
@@ -74,7 +74,7 @@ class BaseTest:
     tensor_size = None
     shape = None
 
-    dtype = theano.config.floatX
+    dtype = aesara.config.floatX
 
     def get_shape(self):
         if self.tensor_size == 0:
@@ -108,12 +108,12 @@ def get_gpu_value(self):
         return rand_gpuarray(*self.shape)
 
     # NB: In compute_host() and compute_gpu(),
-    # the first call of the theano function should be ignored in profiling,
-    # with Theano config flag profiling__ignore_first_call=True.
+    # the first call of the aesara function should be ignored in profiling,
+    # with Aesara config flag profiling__ignore_first_call=True.
 
     def compute_host(self, test_tensor, axis):
         M = self.get_host_tensor()
-        f = theano.function(
+        f = aesara.function(
             [M],
             [tt_max(M, axis=axis), argmax(M, axis=axis)],
             name="shape:" + str(test_tensor.shape) + "/axis:" + str(axis) + "/HOST",
@@ -121,14 +121,14 @@ def compute_host(self, test_tensor, axis):
         )
         check_if_gpu_reduce_not_in_graph(f)
         f(test_tensor)
-        theano_max, theano_argmax = f(test_tensor)
+        aesara_max, aesara_argmax = f(test_tensor)
         ref_max, ref_argmax = numpy_maxandargmax(test_tensor, axis=axis)
-        utt.assert_allclose(ref_max, theano_max)
-        utt.assert_allclose(ref_argmax, theano_argmax)
+        utt.assert_allclose(ref_max, aesara_max)
+        utt.assert_allclose(ref_argmax, aesara_argmax)
 
     def compute_gpu(self, test_gpu_tensor, test_host_tensor, axis):
         M = self.get_gpu_tensor()
-        f = theano.function(
+        f = aesara.function(
             [M],
             [tt_max(M, axis=axis), argmax(M, axis=axis)],
             name="shape:" + str(test_gpu_tensor.shape) + "/axis:" + str(axis) + "/GPU",
@@ -136,10 +136,10 @@ def compute_gpu(self, test_gpu_tensor, test_host_tensor, axis):
         )
         check_if_gpu_reduce_in_graph(f)
         f(test_gpu_tensor)
-        theano_max, theano_argmax = f(test_gpu_tensor)
+        aesara_max, aesara_argmax = f(test_gpu_tensor)
         ref_max, ref_argmax = numpy_maxandargmax(test_host_tensor, axis=axis)
-        utt.assert_allclose(ref_max, theano_max)
-        utt.assert_allclose(ref_argmax, theano_argmax)
+        utt.assert_allclose(ref_max, aesara_max)
+        utt.assert_allclose(ref_argmax, aesara_argmax)
 
     def compute(self, axis=None):
         # We want to run CPU op and GPU op on the same tensor randomly generated.
diff --git a/tests/gpuarray/test_rng_mrg.py b/tests/gpuarray/test_rng_mrg.py
index f46e952f8d..0f5ad05255 100644
--- a/tests/gpuarray/test_rng_mrg.py
+++ b/tests/gpuarray/test_rng_mrg.py
@@ -2,17 +2,17 @@
 
 import numpy as np
 
-import theano
+import aesara
+from aesara import tensor as tt
+from aesara.configdefaults import config
+from aesara.gpuarray.rng_mrg import GPUA_mrg_uniform
+from aesara.gpuarray.type import gpuarray_shared_constructor
+from aesara.sandbox import rng_mrg
+from aesara.sandbox.rng_mrg import MRG_RandomStream
 from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu as mode
 from tests.sandbox.test_rng_mrg import java_samples, rng_mrg_overflow
 from tests.sandbox.test_rng_mrg import test_f16_nonzero as cpu_f16_nonzero
-from theano import tensor as tt
-from theano.configdefaults import config
-from theano.gpuarray.rng_mrg import GPUA_mrg_uniform
-from theano.gpuarray.type import gpuarray_shared_constructor
-from theano.sandbox import rng_mrg
-from theano.sandbox.rng_mrg import MRG_RandomStream
 
 
 utt.seed_rng()
@@ -49,7 +49,7 @@ def test_consistency_GPUA_serial():
 
             # We need the sample back in the main memory
             cpu_sample = tt.as_tensor_variable(sample)
-            f = theano.function([], cpu_sample, mode=mode)
+            f = aesara.function([], cpu_sample, mode=mode)
             for k in range(n_samples):
                 s = f()
                 samples.append(s)
@@ -96,7 +96,7 @@ def test_consistency_GPUA_parallel():
 
         # We need the sample back in the main memory
         cpu_sample = tt.as_tensor_variable(sample)
-        f = theano.function([], cpu_sample, mode=mode)
+        f = aesara.function([], cpu_sample, mode=mode)
 
         for k in range(n_samples):
             s = f()
@@ -120,14 +120,14 @@ def test_GPUA_full_fill():
 
     R = MRG_RandomStream(234)
     uni = R.uniform(size, nstreams=60 * 256)
-    f_cpu = theano.function([], uni)
+    f_cpu = aesara.function([], uni)
 
     rstate_gpu = gpuarray_shared_constructor(R.state_updates[-1][0].get_value())
     new_rstate, sample = GPUA_mrg_uniform.new(
         rstate_gpu, ndim=None, dtype="float32", size=size
     )
     rstate_gpu.default_update = new_rstate
-    f_gpu = theano.function([], sample, mode=mode)
+    f_gpu = aesara.function([], sample, mode=mode)
 
     utt.assert_allclose(f_cpu(), f_gpu())
 
@@ -170,11 +170,11 @@ def test_validate_input_types_gpuarray_backend():
 
 def test_f16_nonzero():
     try:
-        # To have theano.shared(x) try to move on the GPU
-        theano.compile.shared_constructor(gpuarray_shared_constructor)
+        # To have aesara.shared(x) try to move on the GPU
+        aesara.compile.shared_constructor(gpuarray_shared_constructor)
         cpu_f16_nonzero(mode=mode, op_to_check=GPUA_mrg_uniform)
     finally:
-        theano.compile.shared_constructor(gpuarray_shared_constructor, remove=True)
+        aesara.compile.shared_constructor(gpuarray_shared_constructor, remove=True)
 
 
 def test_cpu_target_with_shared_variable():
@@ -182,16 +182,16 @@ def test_cpu_target_with_shared_variable():
     s = np.random.rand(2, 3).astype("float32")
     x = gpuarray_shared_constructor(s, name="x")
     try:
-        # To have theano.shared(x) try to move on the GPU
-        theano.compile.shared_constructor(gpuarray_shared_constructor)
+        # To have aesara.shared(x) try to move on the GPU
+        aesara.compile.shared_constructor(gpuarray_shared_constructor)
         y = srng.uniform(x.shape, target="cpu")
         y.name = "y"
         z = (x * y).sum()
         z.name = "z"
 
-        fz = theano.function([], z, mode=mode)
+        fz = aesara.function([], z, mode=mode)
 
         nodes = fz.maker.fgraph.toposort()
         assert not any([isinstance(node.op, GPUA_mrg_uniform) for node in nodes])
     finally:
-        theano.compile.shared_constructor(gpuarray_shared_constructor, remove=True)
+        aesara.compile.shared_constructor(gpuarray_shared_constructor, remove=True)
diff --git a/tests/gpuarray/test_scan.py b/tests/gpuarray/test_scan.py
index b98b0f82d9..a88bfd4d4c 100644
--- a/tests/gpuarray/test_scan.py
+++ b/tests/gpuarray/test_scan.py
@@ -1,32 +1,32 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.sandbox.rng_mrg
+import aesara
+import aesara.sandbox.rng_mrg
+from aesara import gpuarray
+from aesara import tensor as tt
+from aesara.gpuarray.basic_ops import GpuFromHost, HostFromGpu
+from aesara.gpuarray.elemwise import GpuElemwise
+from aesara.scan.basic import scan
+from aesara.scan.checkpoints import scan_checkpoints
+from aesara.scan.op import Scan
+from aesara.tensor.math import dot
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.type import fscalar, ftensor3, fvector, iscalar, vector
 from tests import unittest_tools as utt
 from tests.gpuarray.config import mode_with_gpu, test_ctx_name
-from theano import gpuarray
-from theano import tensor as tt
-from theano.gpuarray.basic_ops import GpuFromHost, HostFromGpu
-from theano.gpuarray.elemwise import GpuElemwise
-from theano.scan.basic import scan
-from theano.scan.checkpoints import scan_checkpoints
-from theano.scan.op import Scan
-from theano.tensor.math import dot
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.type import fscalar, ftensor3, fvector, iscalar, vector
 
 
 pygpu_gpuarray = pytest.importorskip("pygpy.gpuarray")
 GpuArrayException = pygpu_gpuarray.GpuArrayException
 
 
-if theano.config.mode == "FAST_COMPILE":
-    mode_with_opt = theano.compile.mode.get_mode("FAST_RUN")
+if aesara.config.mode == "FAST_COMPILE":
+    mode_with_opt = aesara.compile.mode.get_mode("FAST_RUN")
 else:
-    mode_with_opt = theano.compile.mode.get_default_mode()
-if theano.config.mode in ("DEBUG_MODE", "DebugMode"):
-    mode_nodebug = theano.compile.mode.get_mode("FAST_RUN")
+    mode_with_opt = aesara.compile.mode.get_default_mode()
+if aesara.config.mode in ("DEBUG_MODE", "DebugMode"):
+    mode_nodebug = aesara.compile.mode.get_mode("FAST_RUN")
 else:
     mode_nodebug = mode_with_opt
 
@@ -57,7 +57,7 @@ def f_rnn(u_t, x_tm1, W_in, W):
         )
 
         output = GpuFromHost(test_ctx_name)(output)
-        f2 = theano.function(
+        f2 = aesara.function(
             [u, x0, W_in, W],
             output,
             updates=updates,
@@ -82,8 +82,8 @@ def f_rnn(u_t, x_tm1, W_in, W):
         for step in range(1, 4):
             v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
 
-        theano_values = f2(v_u, v_x0, W_in, W)
-        utt.assert_allclose(theano_values, v_out)
+        aesara_values = f2(v_u, v_x0, W_in, W)
+        utt.assert_allclose(aesara_values, v_out)
 
         # TO DEL
         topo = f2.maker.fgraph.toposort()
@@ -125,7 +125,7 @@ def f_rnn(u_t, x_tm1, W_in, W):
             mode=mode_with_gpu,
         )
 
-        f2 = theano.function(
+        f2 = aesara.function(
             [u, x0, W_in, W],
             output,
             updates=updates,
@@ -146,8 +146,8 @@ def f_rnn(u_t, x_tm1, W_in, W):
         for step in range(1, 4):
             v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
 
-        theano_values = f2(v_u, v_x0, W_in, W)
-        utt.assert_allclose(theano_values, v_out)
+        aesara_values = f2(v_u, v_x0, W_in, W)
+        utt.assert_allclose(aesara_values, v_out)
 
         topo = f2.maker.fgraph.toposort()
         assert sum([isinstance(node.op, HostFromGpu) for node in topo]) == 1
@@ -184,7 +184,7 @@ def f_rnn(u_t, x_tm1, W_in, W):
             mode=mode_with_gpu,
         )
 
-        f2 = theano.function(
+        f2 = aesara.function(
             [u, x0, W_in, W],
             output,
             updates=updates,
@@ -208,9 +208,9 @@ def f_rnn(u_t, x_tm1, W_in, W):
             v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
             v_out2[step] = np.int64(v_u[step] + v_out1[step - 1])
 
-        theano_out1, theano_out2 = f2(v_u, v_x0, W_in, W)
-        utt.assert_allclose(theano_out1, v_out1)
-        utt.assert_allclose(theano_out2, v_out2)
+        aesara_out1, aesara_out2 = f2(v_u, v_x0, W_in, W)
+        utt.assert_allclose(aesara_out1, v_out1)
+        utt.assert_allclose(aesara_out2, v_out2)
 
         topo = f2.maker.fgraph.toposort()
         scan_node = [node for node in topo if isinstance(node.op, scan.op.Scan)]
@@ -234,8 +234,8 @@ def test_gpu4_gibbs_chain(self):
             ),
             dtype="float32",
         )
-        vsample = theano.shared(v_vsample)
-        trng = theano.sandbox.rng_mrg.MRG_RandomStream(utt.fetch_seed())
+        vsample = aesara.shared(v_vsample)
+        trng = aesara.sandbox.rng_mrg.MRG_RandomStream(utt.fetch_seed())
 
         def f(vsample_tm1):
             return (
@@ -243,7 +243,7 @@ def f(vsample_tm1):
                 * vsample_tm1
             )
 
-        theano_vsamples, updates = scan(
+        aesara_vsamples, updates = scan(
             f,
             [],
             vsample,
@@ -253,9 +253,9 @@ def f(vsample_tm1):
             go_backwards=False,
             mode=mode_with_gpu,
         )
-        my_f = theano.function(
+        my_f = aesara.function(
             [],
-            theano_vsamples[-1],
+            aesara_vsamples[-1],
             updates=updates,
             allow_input_downcast=True,
             mode=mode_with_gpu,
@@ -304,7 +304,7 @@ def f_rnn(u_t, x_tm1, W_in, W):
         )
 
         output = self.gpu_backend.gpu_from_host(output)
-        f2 = theano.function(
+        f2 = aesara.function(
             [u, x0, W_in, W],
             output,
             updates=updates,
@@ -329,8 +329,8 @@ def f_rnn(u_t, x_tm1, W_in, W):
         v_out[0] = v_u[0] * W_in + v_x0 * W
         for step in range(1, 4):
             v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
-        theano_values = f2(v_u, v_x0, W_in, W)
-        utt.assert_allclose(theano_values, v_out)
+        aesara_values = f2(v_u, v_x0, W_in, W)
+        utt.assert_allclose(aesara_values, v_out)
 
         # TO DEL
         topo = f2.maker.fgraph.toposort()
@@ -393,7 +393,7 @@ def f_rnn(u_t, x_tm1, W_in, W):
             mode=self.mode_with_gpu,
         )
 
-        f2 = theano.function(
+        f2 = aesara.function(
             [u, x0, W_in, W],
             output,
             updates=updates,
@@ -413,8 +413,8 @@ def f_rnn(u_t, x_tm1, W_in, W):
         v_out[0] = v_u[0] * W_in + v_x0 * W
         for step in range(1, 4):
             v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
-        theano_values = f2(v_u, v_x0, W_in, W)
-        utt.assert_allclose(theano_values, v_out)
+        aesara_values = f2(v_u, v_x0, W_in, W)
+        utt.assert_allclose(aesara_values, v_out)
 
         topo = f2.maker.fgraph.toposort()
         assert (
@@ -472,7 +472,7 @@ def f_rnn(u_t, x_tm1, W_in, W):
             mode=self.mode_with_gpu,
         )
 
-        f2 = theano.function(
+        f2 = aesara.function(
             [u, x0, W_in, W],
             output,
             updates=updates,
@@ -496,9 +496,9 @@ def f_rnn(u_t, x_tm1, W_in, W):
             v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
             v_out2[step] = np.int64(v_u[step] + v_out1[step - 1])
 
-        theano_out1, theano_out2 = f2(v_u, v_x0, W_in, W)
-        utt.assert_allclose(theano_out1, v_out1)
-        utt.assert_allclose(theano_out2, v_out2)
+        aesara_out1, aesara_out2 = f2(v_u, v_x0, W_in, W)
+        utt.assert_allclose(aesara_out1, v_out1)
+        utt.assert_allclose(aesara_out2, v_out2)
 
         topo = f2.maker.fgraph.toposort()
         scan_node = [node for node in topo if isinstance(node.op, Scan)]
@@ -516,8 +516,8 @@ def test_gibbs_chain(self):
             ),
             dtype="float32",
         )
-        vsample = theano.shared(v_vsample)
-        trng = theano.sandbox.rng_mrg.MRG_RandomStream(utt.fetch_seed())
+        vsample = aesara.shared(v_vsample)
+        trng = aesara.sandbox.rng_mrg.MRG_RandomStream(utt.fetch_seed())
 
         def f(vsample_tm1):
             return (
@@ -525,7 +525,7 @@ def f(vsample_tm1):
                 * vsample_tm1
             )
 
-        theano_vsamples, updates = scan(
+        aesara_vsamples, updates = scan(
             f,
             [],
             vsample,
@@ -535,9 +535,9 @@ def f(vsample_tm1):
             go_backwards=False,
             mode=self.mode_with_gpu,
         )
-        my_f = theano.function(
+        my_f = aesara.function(
             [],
-            theano_vsamples[-1],
+            aesara_vsamples[-1],
             updates=updates,
             allow_input_downcast=True,
             mode=self.mode_with_gpu,
@@ -548,7 +548,7 @@ def f(vsample_tm1):
         my_f()
 
     def test_gpu_memory_usage(self):
-        # This test validates that the memory usage of the defined theano
+        # This test validates that the memory usage of the defined aesara
         # function is reasonnable when executed on the GPU. It checks for
         # a bug in which one of scan's optimization was not applied which
         # made the scan node compute large and unnecessary outputs which
@@ -570,9 +570,9 @@ def test_gpu_memory_usage(self):
         yout = ftensor3(name="yout")
 
         # Initialize the network parameters
-        U = theano.shared(np.zeros((n_in, n_hid), dtype="float32"), name="W_xin_to_l1")
-        V = theano.shared(np.zeros((n_hid, n_hid), dtype="float32"), name="W_l1_to_l1")
-        W = theano.shared(np.zeros((n_hid, n_out), dtype="float32"), name="W_l1_to_l2")
+        U = aesara.shared(np.zeros((n_in, n_hid), dtype="float32"), name="W_xin_to_l1")
+        V = aesara.shared(np.zeros((n_hid, n_hid), dtype="float32"), name="W_l1_to_l1")
+        W = aesara.shared(np.zeros((n_hid, n_out), dtype="float32"), name="W_l1_to_l2")
         nparams = [U, V, W]
 
         # Build the forward pass
@@ -594,11 +594,11 @@ def scan_l(baseline, last_step):
 
         # Compute the cost and take the gradient wrt params
         cost = tt_sum((l2_out - yout) ** 2)
-        grads = theano.grad(cost, nparams)
+        grads = aesara.grad(cost, nparams)
         updates = list(zip(nparams, (n - g for n, g in zip(nparams, grads))))
 
-        # Compile the theano function
-        feval_backprop = theano.function(
+        # Compile the aesara function
+        feval_backprop = aesara.function(
             [xin, yout], cost, updates=updates, mode=self.mode_with_gpu_nodebug
         )
 
@@ -613,7 +613,7 @@ def scan_l(baseline, last_step):
         grad_scan_node = scan_nodes[1]
         assert len(grad_scan_node.outputs) == 2, len(grad_scan_node.outputs)
 
-        # Call the theano function to ensure the absence of a memory error
+        # Call the aesara function to ensure the absence of a memory error
         feval_backprop(
             np.zeros((mb_length, mb_size, n_in), dtype="float32"),
             np.zeros((mb_length, mb_size, n_out), dtype="float32"),
@@ -645,7 +645,7 @@ def inner_fn(seq1, recurrent_out):
         out1 = out[0].flatten()
         out2 = out[1].flatten()
 
-        fct = theano.function([input1, init], [out1, out2], mode=self.mode_with_gpu)
+        fct = aesara.function([input1, init], [out1, out2], mode=self.mode_with_gpu)
 
         output = fct(
             np.ones((2, 1, 1), dtype="float32"), np.ones((1, 1, 1), dtype="float32")
@@ -706,27 +706,27 @@ def setup_method(self):
         )
         self.result = result[-1]
         self.result_check = result_check[-1]
-        self.grad_A = theano.grad(self.result.sum(), self.A)
-        self.grad_A_check = theano.grad(self.result_check.sum(), self.A)
+        self.grad_A = aesara.grad(self.result.sum(), self.A)
+        self.grad_A_check = aesara.grad(self.result_check.sum(), self.A)
 
     def test_memory(self):
         from tests.gpuarray.config import mode_with_gpu  # noqa
 
-        f = theano.function(
+        f = aesara.function(
             inputs=[self.A, self.k], outputs=self.grad_A, mode=mode_with_gpu
         )
-        f_check = theano.function(
+        f_check = aesara.function(
             inputs=[self.A, self.k], outputs=self.grad_A_check, mode=mode_with_gpu
         )
-        free_gmem = theano.gpuarray.type._context_reg[None].free_gmem
+        free_gmem = aesara.gpuarray.type._context_reg[None].free_gmem
         data = np.ones(free_gmem // 3000, dtype=np.float32)
         # Check that it works with the checkpoints
         size = 1000
-        if isinstance(mode_with_gpu, theano.compile.debugmode.DebugMode):
+        if isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode):
             size = 100
         f_check(data, size)
         # Check that the basic scan fails in that case
         # Skip that check in DebugMode, as it can fail in different ways
-        if not isinstance(mode_with_gpu, theano.compile.debugmode.DebugMode):
+        if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode):
             with pytest.raises(GpuArrayException):
                 f(data, 1000)
diff --git a/tests/gpuarray/test_sort.py b/tests/gpuarray/test_sort.py
index 197099f97e..ebbf4f138f 100644
--- a/tests/gpuarray/test_sort.py
+++ b/tests/gpuarray/test_sort.py
@@ -1,6 +1,6 @@
+from aesara.gpuarray.sort import GpuTopKOp
 from tests.gpuarray.config import mode_with_gpu
 from tests.tensor.test_sort import TestTopK
-from theano.gpuarray.sort import GpuTopKOp
 
 
 class TestGpuTopK(TestTopK):
diff --git a/tests/gpuarray/test_subtensor.py b/tests/gpuarray/test_subtensor.py
index 759de39761..78ac3460b2 100644
--- a/tests/gpuarray/test_subtensor.py
+++ b/tests/gpuarray/test_subtensor.py
@@ -1,14 +1,10 @@
 import numpy as np
 
-import theano
-from tests import unittest_tools as utt
-from tests.gpuarray.config import mode_with_gpu, test_ctx_name
-from tests.tensor.test_basic import TestAllocDiag
-from tests.tensor.test_subtensor import TestAdvancedSubtensor, TestSubtensor
-from theano.compile import DeepCopyOp
-from theano.gpuarray.basic_ops import GpuContiguous, GpuFromHost, HostFromGpu
-from theano.gpuarray.elemwise import GpuDimShuffle
-from theano.gpuarray.subtensor import (
+import aesara
+from aesara.compile import DeepCopyOp
+from aesara.gpuarray.basic_ops import GpuContiguous, GpuFromHost, HostFromGpu
+from aesara.gpuarray.elemwise import GpuDimShuffle
+from aesara.gpuarray.subtensor import (
     GpuAdvancedIncSubtensor,
     GpuAdvancedIncSubtensor1,
     GpuAdvancedIncSubtensor1_dev20,
@@ -19,11 +15,15 @@
     GpuIncSubtensor,
     GpuSubtensor,
 )
-from theano.gpuarray.type import gpuarray_shared_constructor
-from theano.tensor.basic import AllocDiag, ExtractDiag
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.subtensor import advanced_inc_subtensor1, inc_subtensor
-from theano.tensor.type import ivectors, matrix, tensor, tensor4, vector
+from aesara.gpuarray.type import gpuarray_shared_constructor
+from aesara.tensor.basic import AllocDiag, ExtractDiag
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.subtensor import advanced_inc_subtensor1, inc_subtensor
+from aesara.tensor.type import ivectors, matrix, tensor, tensor4, vector
+from tests import unittest_tools as utt
+from tests.gpuarray.config import mode_with_gpu, test_ctx_name
+from tests.tensor.test_basic import TestAllocDiag
+from tests.tensor.test_subtensor import TestAdvancedSubtensor, TestSubtensor
 
 
 class TestGPUSubtensor(TestSubtensor):
@@ -80,7 +80,7 @@ def test_advinc_subtensor1():
         x = shared(xval, name="x")
         y = tensor(dtype="float32", broadcastable=(False,) * len(shp), name="y")
         expr = advanced_inc_subtensor1(x, y, [0, 2])
-        f = theano.function([y], expr, mode=mode_with_gpu)
+        f = aesara.function([y], expr, mode=mode_with_gpu)
         assert (
             sum(
                 [
@@ -115,7 +115,7 @@ def test_advinc_subtensor1_dtype():
         x = shared(xval, name="x")
         y = tensor(dtype=yval.dtype, broadcastable=(False,) * len(yval.shape), name="y")
         expr = advanced_inc_subtensor1(x, y, [0, 2])
-        f = theano.function([y], expr, mode=mode_with_gpu)
+        f = aesara.function([y], expr, mode=mode_with_gpu)
         assert (
             sum(
                 [
@@ -131,7 +131,7 @@ def test_advinc_subtensor1_dtype():
         assert np.allclose(rval, rep)
 
 
-@theano.config.change_flags(deterministic="more")
+@aesara.config.change_flags(deterministic="more")
 def test_deterministic_flag():
     shp = (3, 4)
     for dtype1, dtype2 in [("float32", "int8")]:
@@ -142,7 +142,7 @@ def test_deterministic_flag():
         x = shared(xval, name="x")
         y = tensor(dtype=yval.dtype, broadcastable=(False,) * len(yval.shape), name="y")
         expr = advanced_inc_subtensor1(x, y, [0, 2])
-        f = theano.function([y], expr, mode=mode_with_gpu)
+        f = aesara.function([y], expr, mode=mode_with_gpu)
         assert (
             sum(
                 [
@@ -176,7 +176,7 @@ def test_advinc_subtensor1_vector_scalar():
         x = shared(xval, name="x")
         y = tensor(dtype=yval.dtype, broadcastable=(False,) * len(yval.shape), name="y")
         expr = advanced_inc_subtensor1(x, y, [0, 2])
-        f = theano.function([y], expr, mode=mode_with_gpu)
+        f = aesara.function([y], expr, mode=mode_with_gpu)
 
         assert (
             sum(
@@ -205,7 +205,7 @@ def test_incsub_f16():
     x = shared(xval, name="x")
     y = tensor(dtype="float16", broadcastable=(False,) * len(shp), name="y")
     expr = advanced_inc_subtensor1(x, y, [0, 2])
-    f = theano.function([y], expr, mode=mode_with_gpu)
+    f = aesara.function([y], expr, mode=mode_with_gpu)
     assert (
         sum(
             [
@@ -221,7 +221,7 @@ def test_incsub_f16():
     assert np.allclose(rval, rep)
 
     expr = inc_subtensor(x[1:], y)
-    f = theano.function([y], expr, mode=mode_with_gpu)
+    f = aesara.function([y], expr, mode=mode_with_gpu)
     assert (
         sum(
             [isinstance(node.op, GpuIncSubtensor) for node in f.maker.fgraph.toposort()]
@@ -238,14 +238,14 @@ def test_incsub_offset():
     # Test for https://github.com/Theano/Theano/issues/5670
 
     # Build a GPU variable which value will have an offset (x1)
-    x = gpuarray_shared_constructor(np.zeros(5, dtype=theano.config.floatX))
+    x = gpuarray_shared_constructor(np.zeros(5, dtype=aesara.config.floatX))
     x1 = x[1:]
     # Use inc_subtensor on it
     y = vector()
     z = inc_subtensor(x1[2:], y)
     # Use updates so that inc_subtensor can happen inplace
-    f = theano.function([y], z, updates={x: z}, mode=mode_with_gpu)
-    utt.assert_allclose(f([1, 2]), np.array([0, 0, 1, 2], dtype=theano.config.floatX))
+    f = aesara.function([y], z, updates={x: z}, mode=mode_with_gpu)
+    utt.assert_allclose(f([1, 2]), np.array([0, 0, 1, 2], dtype=aesara.config.floatX))
 
 
 class TestGPUAdvancedSubtensor(TestAdvancedSubtensor):
@@ -281,12 +281,12 @@ def test_adv_subtensor():
     # Test the advancedsubtensor on gpu.
     shp = (2, 3, 4)
     shared = gpuarray_shared_constructor
-    xval = np.arange(np.prod(shp), dtype=theano.config.floatX).reshape(shp)
+    xval = np.arange(np.prod(shp), dtype=aesara.config.floatX).reshape(shp)
     idx1, idx2 = ivectors("idx1", "idx2")
     idxs = [idx1, None, slice(0, 2, 1), idx2, None]
     x = shared(xval, name="x")
     expr = x[idxs]
-    f = theano.function([idx1, idx2], expr, mode=mode_with_gpu)
+    f = aesara.function([idx1, idx2], expr, mode=mode_with_gpu)
     assert (
         sum(
             [
@@ -306,24 +306,24 @@ def test_adv_subtensor():
 class TestGpuExtractDiag:
     def test_extractdiag_opt(self):
         x = matrix()
-        fn = theano.function([x], ExtractDiag()(x), mode=mode_with_gpu)
+        fn = aesara.function([x], ExtractDiag()(x), mode=mode_with_gpu)
         assert any(
             [isinstance(node.op, GpuExtractDiag) for node in fn.maker.fgraph.toposort()]
         )
 
     def test_matrix(self):
         x = matrix()
-        np_x = np.arange(77).reshape(7, 11).astype(theano.config.floatX)
-        fn = theano.function([x], GpuExtractDiag()(x), mode=mode_with_gpu)
+        np_x = np.arange(77).reshape(7, 11).astype(aesara.config.floatX)
+        fn = aesara.function([x], GpuExtractDiag()(x), mode=mode_with_gpu)
         assert np.allclose(fn(np_x), np_x.diagonal())
-        fn = theano.function([x], GpuExtractDiag(2)(x), mode=mode_with_gpu)
+        fn = aesara.function([x], GpuExtractDiag(2)(x), mode=mode_with_gpu)
         assert np.allclose(fn(np_x), np_x.diagonal(2))
-        fn = theano.function([x], GpuExtractDiag(-3)(x), mode=mode_with_gpu)
+        fn = aesara.function([x], GpuExtractDiag(-3)(x), mode=mode_with_gpu)
         assert np.allclose(fn(np_x), np_x.diagonal(-3))
 
     def test_tensor(self):
         x = tensor4()
-        np_x = np.arange(30107).reshape(7, 11, 17, 23).astype(theano.config.floatX)
+        np_x = np.arange(30107).reshape(7, 11, 17, 23).astype(aesara.config.floatX)
         for offset, axis1, axis2 in [
             (1, 0, 1),
             (-1, 0, 1),
@@ -374,33 +374,33 @@ def setup_method(self):
 
     def test_allocdiag_opt(self):
         x = vector()
-        fn = theano.function([x], AllocDiag()(x), mode=mode_with_gpu)
+        fn = aesara.function([x], AllocDiag()(x), mode=mode_with_gpu)
         assert any(
             [isinstance(node.op, GpuAllocDiag) for node in fn.maker.fgraph.toposort()]
         )
 
     def test_matrix(self):
         x = vector()
-        np_x = np.arange(7).astype(theano.config.floatX)
-        fn = theano.function([x], GpuAllocDiag()(x), mode=mode_with_gpu)
+        np_x = np.arange(7).astype(aesara.config.floatX)
+        fn = aesara.function([x], GpuAllocDiag()(x), mode=mode_with_gpu)
         assert np.allclose(fn(np_x), np.diag(np_x))
-        fn = theano.function([x], GpuAllocDiag(2)(x), mode=mode_with_gpu)
+        fn = aesara.function([x], GpuAllocDiag(2)(x), mode=mode_with_gpu)
         assert np.allclose(fn(np_x), np.diag(np_x, 2))
-        fn = theano.function([x], GpuAllocDiag(-3)(x), mode=mode_with_gpu)
+        fn = aesara.function([x], GpuAllocDiag(-3)(x), mode=mode_with_gpu)
         assert np.allclose(fn(np_x), np.diag(np_x, -3))
 
     def test_grad(self):
         x = vector()
-        np_x = np.random.randn(7).astype(theano.config.floatX)
+        np_x = np.random.randn(7).astype(aesara.config.floatX)
 
         # offset = 0 case:
         mtx_x = GpuAllocDiag()(x)
         sum_mtx_x = tt_sum(mtx_x)
-        grad_x = theano.grad(sum_mtx_x, x)
-        grad_mtx_x = theano.grad(sum_mtx_x, mtx_x)
+        grad_x = aesara.grad(sum_mtx_x, x)
+        grad_mtx_x = aesara.grad(sum_mtx_x, mtx_x)
 
-        fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
-        fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
+        fn_grad_x = aesara.function([x], grad_x, mode=mode_with_gpu)
+        fn_grad_mtx_x = aesara.function([x], grad_mtx_x, mode=mode_with_gpu)
 
         computed_grad_x = fn_grad_x(np_x)
         computed_grad_mtx_x = fn_grad_mtx_x(np_x)
@@ -410,11 +410,11 @@ def test_grad(self):
         # offset > 0 case:
         mtx_x = GpuAllocDiag(2)(x)
         sum_mtx_x = tt_sum(mtx_x)
-        grad_x = theano.grad(sum_mtx_x, x)
-        grad_mtx_x = theano.grad(sum_mtx_x, mtx_x)
+        grad_x = aesara.grad(sum_mtx_x, x)
+        grad_mtx_x = aesara.grad(sum_mtx_x, mtx_x)
 
-        fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
-        fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
+        fn_grad_x = aesara.function([x], grad_x, mode=mode_with_gpu)
+        fn_grad_mtx_x = aesara.function([x], grad_mtx_x, mode=mode_with_gpu)
 
         computed_grad_x = fn_grad_x(np_x)
         computed_grad_mtx_x = fn_grad_mtx_x(np_x)
@@ -424,11 +424,11 @@ def test_grad(self):
         # offset < 0 case:
         mtx_x = GpuAllocDiag(-3)(x)
         sum_mtx_x = tt_sum(mtx_x)
-        grad_x = theano.grad(sum_mtx_x, x)
-        grad_mtx_x = theano.grad(sum_mtx_x, mtx_x)
+        grad_x = aesara.grad(sum_mtx_x, x)
+        grad_mtx_x = aesara.grad(sum_mtx_x, mtx_x)
 
-        fn_grad_x = theano.function([x], grad_x, mode=mode_with_gpu)
-        fn_grad_mtx_x = theano.function([x], grad_mtx_x, mode=mode_with_gpu)
+        fn_grad_x = aesara.function([x], grad_x, mode=mode_with_gpu)
+        fn_grad_mtx_x = aesara.function([x], grad_mtx_x, mode=mode_with_gpu)
 
         computed_grad_x = fn_grad_x(np_x)
         computed_grad_mtx_x = fn_grad_mtx_x(np_x)
diff --git a/tests/gpuarray/test_type.py b/tests/gpuarray/test_type.py
index a644f7a79f..43916a5292 100644
--- a/tests/gpuarray/test_type.py
+++ b/tests/gpuarray/test_type.py
@@ -1,23 +1,22 @@
 import os
+from pickle import Unpickler
 
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara.compile.ops import DeepCopyOp, ViewOp
+from aesara.configdefaults import config
+from aesara.gpuarray.type import GpuArrayType, gpuarray_shared_constructor
+from aesara.tensor.basic import Rebroadcast
+from aesara.tensor.shape import specify_shape
+from aesara.tensor.type import row
+from tests.gpuarray.config import test_ctx_name
+from tests.gpuarray.test_basic_ops import rand_gpuarray
 
 
 pygpu = pytest.importorskip("pygpu")
 
-from pickle import Unpickler
-
-from tests.gpuarray.config import test_ctx_name
-from tests.gpuarray.test_basic_ops import rand_gpuarray
-from theano.compile import DeepCopyOp, Rebroadcast, ViewOp
-from theano.configdefaults import config
-from theano.gpuarray.type import GpuArrayType, gpuarray_shared_constructor
-from theano.tensor.shape import specify_shape
-from theano.tensor.type import row
-
 
 # Disabled for now
 # from tests.tensor.test_sharedvar import makeSharedTester
@@ -28,7 +27,7 @@ def test_deep_copy():
         a = rand_gpuarray(20, dtype=dtype)
         g = GpuArrayType(dtype=dtype, broadcastable=(False,))("g")
 
-        f = theano.function([g], g)
+        f = aesara.function([g], g)
 
         assert isinstance(f.maker.fgraph.toposort()[0].op, DeepCopyOp)
 
@@ -42,8 +41,8 @@ def test_view():
         a = rand_gpuarray(20, dtype=dtype)
         g = GpuArrayType(dtype=dtype, broadcastable=(False,))("g")
 
-        m = theano.compile.get_default_mode().excluding("local_view_op")
-        f = theano.function([g], ViewOp()(g), mode=m)
+        m = aesara.compile.get_default_mode().excluding("local_view_op")
+        f = aesara.function([g], ViewOp()(g), mode=m)
 
         assert isinstance(f.maker.fgraph.toposort()[0].op, ViewOp)
 
@@ -57,7 +56,7 @@ def test_rebroadcast():
         a = rand_gpuarray(1, dtype=dtype)
         g = GpuArrayType(dtype=dtype, broadcastable=(False,))("g")
 
-        f = theano.function([g], Rebroadcast((0, True))(g))
+        f = aesara.function([g], Rebroadcast((0, True))(g))
 
         assert isinstance(f.maker.fgraph.toposort()[0].op, Rebroadcast)
 
@@ -81,23 +80,23 @@ def test_specify_shape():
     for dtype in ["float16", "float32"]:
         a = rand_gpuarray(20, dtype=dtype)
         g = GpuArrayType(dtype=dtype, broadcastable=(False,))("g")
-        f = theano.function([g], specify_shape(g, [20]))
+        f = aesara.function([g], specify_shape(g, [20]))
         f(a)
 
 
 def test_filter_float():
-    theano.compile.shared_constructor(gpuarray_shared_constructor)
+    aesara.compile.shared_constructor(gpuarray_shared_constructor)
     try:
-        s = theano.shared(np.array(0.0, dtype="float32"), target=test_ctx_name)
-        theano.function([], updates=[(s, 0.0)])
+        s = aesara.shared(np.array(0.0, dtype="float32"), target=test_ctx_name)
+        aesara.function([], updates=[(s, 0.0)])
     finally:
-        del theano.compile.sharedvalue.shared.constructors[-1]
+        del aesara.compile.sharedvalue.shared.constructors[-1]
 
 
 def test_filter_variable():
     # Test that filter_variable accepts more restrictive broadcast
-    gpu_row = GpuArrayType(dtype=theano.config.floatX, broadcastable=(True, False))
-    gpu_matrix = GpuArrayType(dtype=theano.config.floatX, broadcastable=(False, False))
+    gpu_row = GpuArrayType(dtype=aesara.config.floatX, broadcastable=(True, False))
+    gpu_matrix = GpuArrayType(dtype=aesara.config.floatX, broadcastable=(False, False))
     r = gpu_row()
     m = gpu_matrix.filter_variable(r)
     assert m.type == gpu_matrix
@@ -138,7 +137,7 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
 # These tests are disabled because they expect the impossible
 # @makeSharedTester(
 #     shared_constructor_=gpuarray_shared_constructor,
-#     dtype_=theano.config.floatX,
+#     dtype_=aesara.config.floatX,
 #     get_value_borrow_true_alias_=True,
 #     shared_borrow_true_alias_=True,
 #     set_value_borrow_true_alias_=True,
@@ -148,7 +147,7 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
 #     internal_type_=lambda v: pygpu.array(v, context=get_context(test_ctx_name),
 #                                          cls=pygpu._array.ndgpuarray),
 #     test_internal_type_=lambda a: isinstance(a, pygpu.gpuarray.GpuArray),
-#     theano_fct_=theano.tensor.exp,
+#     aesara_fct_=aesara.tensor.exp,
 #     ref_fct_=np.exp,
 #     cast_value_=lambda v: pygpu.array(v, context=get_context(test_ctx_name),
 #                                       cls=pygpu._array.ndgpuarray))
@@ -158,7 +157,7 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
 
 # @makeSharedTester(
 #     shared_constructor_=gpuarray_shared_constructor,
-#     dtype_=theano.config.floatX,
+#     dtype_=aesara.config.floatX,
 #     get_value_borrow_true_alias_=False,
 #     shared_borrow_true_alias_=False,
 #     set_value_borrow_true_alias_=False,
@@ -168,7 +167,7 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag0():
 #     internal_type_=lambda v: pygpu.array(v, context=get_context(test_ctx_name),
 #                                          cls=pygpu._array.ndgpuarray),
 #     test_internal_type_=lambda a: isinstance(a, pygpu.gpuarray.GpuArray),
-#     theano_fct_=theano.tensor.exp,
+#     aesara_fct_=aesara.tensor.exp,
 #     ref_fct_=np.exp,
 #     cast_value_=lambda v: pygpu.array(v, context=get_context(test_ctx_name),
 #                                       cls=pygpu._array.ndgpuarray))
diff --git a/tests/graph/c_code/test_cenum.h b/tests/graph/c_code/test_cenum.h
index f1dcfdb982..3d7093abf4 100644
--- a/tests/graph/c_code/test_cenum.h
+++ b/tests/graph/c_code/test_cenum.h
@@ -1,5 +1,5 @@
-#ifndef THEANO_TEST_CENUM
-#define THEANO_TEST_CENUM
+#ifndef AESARA_TEST_CENUM
+#define AESARA_TEST_CENUM
 
 #define MILLION 1000000
 #define BILLION 1000000000
diff --git a/tests/graph/test_basic.py b/tests/graph/test_basic.py
index 7640b1e9d4..eb667cea5c 100644
--- a/tests/graph/test_basic.py
+++ b/tests/graph/test_basic.py
@@ -4,9 +4,9 @@
 import numpy as np
 import pytest
 
-from theano import shared
-from theano import tensor as tt
-from theano.graph.basic import (
+from aesara import shared
+from aesara import tensor as tt
+from aesara.graph.basic import (
     Apply,
     Variable,
     ancestors,
@@ -23,12 +23,12 @@
     vars_between,
     walk,
 )
-from theano.graph.op import Op
-from theano.graph.type import Type
-from theano.tensor.math import max_and_argmax
-from theano.tensor.type import TensorType, iscalars, matrix, scalars
-from theano.tensor.type_other import NoneConst
-from theano.tensor.var import TensorVariable
+from aesara.graph.op import Op
+from aesara.graph.type import Type
+from aesara.tensor.math import max_and_argmax
+from aesara.tensor.type import TensorType, iscalars, matrix, scalars
+from aesara.tensor.type_other import NoneConst
+from aesara.tensor.var import TensorVariable
 
 
 class MyType(Type):
diff --git a/tests/graph/test_compute_test_value.py b/tests/graph/test_compute_test_value.py
index efc8084bdc..b3c2903459 100644
--- a/tests/graph/test_compute_test_value.py
+++ b/tests/graph/test_compute_test_value.py
@@ -3,20 +3,20 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-from theano import scalar as ts
-from theano.configdefaults import config
-from theano.graph import utils
-from theano.graph.basic import Apply
-from theano.graph.op import COp, Op
-from theano.graph.type import Type
-from theano.tensor.math import _allclose, dot
-from theano.tensor.type import fmatrix, iscalar, matrix, vector
+import aesara
+import aesara.tensor as tt
+from aesara import scalar as ts
+from aesara.configdefaults import config
+from aesara.graph import utils
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp, Op
+from aesara.graph.type import Type
+from aesara.tensor.math import _allclose, dot
+from aesara.tensor.type import fmatrix, iscalar, matrix, vector
 
 
 @pytest.fixture(scope="module", autouse=True)
-def set_theano_flags():
+def set_aesara_flags():
     with config.change_flags(compute_test_value="raise"):
         yield
 
@@ -86,7 +86,7 @@ def test_variable_only(self):
         # should work
         z = dot(x, y)
         assert hasattr(z.tag, "test_value")
-        f = theano.function([x, y], z)
+        f = aesara.function([x, y], z)
         assert _allclose(f(x.tag.test_value, y.tag.test_value), z.tag.test_value)
 
         # this test should fail
@@ -120,12 +120,12 @@ def test_string_var(self):
         y = matrix("y")
         y.tag.test_value = np.random.rand(4, 5).astype(config.floatX)
 
-        z = theano.shared(np.random.rand(5, 6).astype(config.floatX))
+        z = aesara.shared(np.random.rand(5, 6).astype(config.floatX))
 
         # should work
         out = dot(dot(x, y), z)
         assert hasattr(out.tag, "test_value")
-        tf = theano.function([x, y], out)
+        tf = aesara.function([x, y], out)
         assert _allclose(tf(x.tag.test_value, y.tag.test_value), out.tag.test_value)
 
         def f(x, y, z):
@@ -139,12 +139,12 @@ def f(x, y, z):
     def test_shared(self):
         x = matrix("x")
         x.tag.test_value = np.random.rand(3, 4).astype(config.floatX)
-        y = theano.shared(np.random.rand(4, 6).astype(config.floatX), "y")
+        y = aesara.shared(np.random.rand(4, 6).astype(config.floatX), "y")
 
         # should work
         z = dot(x, y)
         assert hasattr(z.tag, "test_value")
-        f = theano.function([x], z)
+        f = aesara.function([x], z)
         assert _allclose(f(x.tag.test_value), z.tag.test_value)
 
         # this test should fail
@@ -154,12 +154,12 @@ def test_shared(self):
 
     def test_ndarray(self):
         x = np.random.rand(2, 3).astype(config.floatX)
-        y = theano.shared(np.random.rand(3, 6).astype(config.floatX), "y")
+        y = aesara.shared(np.random.rand(3, 6).astype(config.floatX), "y")
 
         # should work
         z = dot(x, y)
         assert hasattr(z.tag, "test_value")
-        f = theano.function([], z)
+        f = aesara.function([], z)
         assert _allclose(f(), z.tag.test_value)
 
         # this test should fail
@@ -168,22 +168,22 @@ def test_ndarray(self):
             dot(x, y)
 
     def test_empty_elemwise(self):
-        x = theano.shared(np.random.rand(0, 6).astype(config.floatX), "x")
+        x = aesara.shared(np.random.rand(0, 6).astype(config.floatX), "x")
 
         # should work
         z = (x + 2) * 3
         assert hasattr(z.tag, "test_value")
-        f = theano.function([], z)
+        f = aesara.function([], z)
         assert _allclose(f(), z.tag.test_value)
 
     def test_constant(self):
         x = tt.constant(np.random.rand(2, 3), dtype=config.floatX)
-        y = theano.shared(np.random.rand(3, 6).astype(config.floatX), "y")
+        y = aesara.shared(np.random.rand(3, 6).astype(config.floatX), "y")
 
         # should work
         z = dot(x, y)
         assert hasattr(z.tag, "test_value")
-        f = theano.function([], z)
+        f = aesara.function([], z)
         assert _allclose(f(), z.tag.test_value)
 
         # this test should fail
@@ -224,7 +224,7 @@ def fx(prior_result, A):
             return prior_result * A
 
         # Symbolic description of the result
-        result, updates = theano.scan(
+        result, updates = aesara.scan(
             fn=fx, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k
         )
 
@@ -245,7 +245,7 @@ def fx(prior_result, A):
             return dot(prior_result, A)
 
         with pytest.raises(ValueError) as e:
-            theano.scan(fn=fx, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k)
+            aesara.scan(fn=fx, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k)
 
         assert str(e.traceback[0].path).endswith("test_compute_test_value.py")
         # We should be in the "fx" function defined above
@@ -263,12 +263,12 @@ def fx(prior_result, A):
             return dot(prior_result, A)
 
         with pytest.raises(ValueError):
-            theano.scan(
+            aesara.scan(
                 fn=fx, outputs_info=tt.ones_like(A.T), non_sequences=A, n_steps=k
             )
 
         with pytest.raises(ValueError, match="^could not broadcast input"):
-            theano.scan(
+            aesara.scan(
                 fn=fx, outputs_info=tt.ones_like(A.T), non_sequences=A, n_steps=k
             )
 
@@ -323,6 +323,6 @@ def test_no_perform(self):
     def test_disabled_during_compilation(self):
         # We test that it is disabled when we include deep copy in the code
         # This don't test that it is disabled during optimization, but the code do it.
-        init_Mu1 = theano.shared(np.zeros((5,), dtype=config.floatX)).dimshuffle("x", 0)
+        init_Mu1 = aesara.shared(np.zeros((5,), dtype=config.floatX)).dimshuffle("x", 0)
 
-        theano.function([], outputs=[init_Mu1])
+        aesara.function([], outputs=[init_Mu1])
diff --git a/tests/graph/test_destroyhandler.py b/tests/graph/test_destroyhandler.py
index a2edb3dbea..d6ead06998 100644
--- a/tests/graph/test_destroyhandler.py
+++ b/tests/graph/test_destroyhandler.py
@@ -2,21 +2,21 @@
 
 import pytest
 
-from tests.unittest_tools import assertFailure_fast
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Constant, Variable, clone
-from theano.graph.destroyhandler import DestroyHandler
-from theano.graph.fg import FunctionGraph, InconsistencyError
-from theano.graph.op import Op
-from theano.graph.opt import (
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Constant, Variable, clone
+from aesara.graph.destroyhandler import DestroyHandler
+from aesara.graph.fg import FunctionGraph, InconsistencyError
+from aesara.graph.op import Op
+from aesara.graph.opt import (
     NavigatorOptimizer,
     OpKeyOptimizer,
     OpSub,
     PatternSub,
     TopoOptimizer,
 )
-from theano.graph.toolbox import ReplaceValidate
-from theano.graph.type import Type
+from aesara.graph.toolbox import ReplaceValidate
+from aesara.graph.type import Type
+from tests.unittest_tools import assertFailure_fast
 
 
 def PatternOptimizer(p1, p2, ign=True):
diff --git a/tests/graph/test_fg.py b/tests/graph/test_fg.py
index fd88304283..7ae08e4e95 100644
--- a/tests/graph/test_fg.py
+++ b/tests/graph/test_fg.py
@@ -3,10 +3,10 @@
 import numpy as np
 import pytest
 
+from aesara.configdefaults import config
+from aesara.graph.fg import FunctionGraph, MissingInputError
+from aesara.graph.toolbox import BadOptimization
 from tests.graph.utils import MyVariable, MyVariable2, op1, op2, op3
-from theano.configdefaults import config
-from theano.graph.fg import FunctionGraph, MissingInputError
-from theano.graph.toolbox import BadOptimization
 
 
 class TestFunctionGraph:
@@ -149,7 +149,7 @@ def test_import_var(self):
         assert var5.owner in fg.apply_nodes
 
         with pytest.raises(TypeError, match="Computation graph contains.*"):
-            from theano.graph.null_type import NullType
+            from aesara.graph.null_type import NullType
 
             fg.import_var(NullType()(), "testing")
 
diff --git a/tests/graph/test_op.py b/tests/graph/test_op.py
index 1e13afd9d9..1dab684b8f 100644
--- a/tests/graph/test_op.py
+++ b/tests/graph/test_op.py
@@ -1,18 +1,18 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.graph.op as op
-import theano.tensor as tt
-from theano import scalar as ts
-from theano import shared
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import COp, Op
-from theano.graph.type import Generic, Type
-from theano.graph.utils import MethodNotDefined, TestValueError
-from theano.tensor.math import log
-from theano.tensor.type import dmatrix, vector
+import aesara
+import aesara.graph.op as op
+import aesara.tensor as tt
+from aesara import scalar as ts
+from aesara import shared
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import COp, Op
+from aesara.graph.type import Generic, Type
+from aesara.graph.utils import MethodNotDefined, TestValueError
+from aesara.tensor.math import log
+from aesara.tensor.type import dmatrix, vector
 
 
 def as_variable(x):
@@ -137,7 +137,7 @@ def test_validate(self):
 
     def test_op_no_input(self):
         x = NoInputOp()()
-        f = theano.function([], x)
+        f = aesara.function([], x)
         rval = f()
         assert rval == "test Op no input"
 
@@ -146,18 +146,18 @@ def test_op_no_input(self):
     )
     def test_op_struct(self):
         sop = StructOp()
-        c = sop(theano.tensor.constant(0))
+        c = sop(aesara.tensor.constant(0))
         mode = None
         if config.mode == "FAST_COMPILE":
             mode = "FAST_RUN"
-        f = theano.function([], c, mode=mode)
+        f = aesara.function([], c, mode=mode)
         rval = f()
         assert rval == 0
         rval = f()
         assert rval == 1
 
-        c2 = sop(theano.tensor.constant(1))
-        f2 = theano.function([], [c, c2], mode=mode)
+        c2 = sop(aesara.tensor.constant(1))
+        f2 = aesara.function([], [c, c2], mode=mode)
         rval = f2()
         assert rval == [0, 0]
 
@@ -256,7 +256,7 @@ def perform(self, node, inputs, outputs):
                 output[0] = inp * 2
 
         x_input = dmatrix("x_input")
-        f = theano.function([x_input], DoubleOp()(x_input))
+        f = aesara.function([x_input], DoubleOp()(x_input))
         inp = np.random.rand(5, 4)
         out = f(inp)
         assert np.allclose(inp * 2, out)
diff --git a/tests/graph/test_opt.py b/tests/graph/test_opt.py
index 81ad7ece60..908610d11e 100644
--- a/tests/graph/test_opt.py
+++ b/tests/graph/test_opt.py
@@ -1,5 +1,27 @@
 import pytest
 
+from aesara.assert_op import assert_op
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Constant
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import Op
+from aesara.graph.opt import (
+    EquilibriumOptimizer,
+    MergeOptimizer,
+    OpKeyOptimizer,
+    OpSub,
+    PatternSub,
+    TopoOptimizer,
+    aesara,
+    logging,
+    pre_constant_merge,
+    pre_greedy_local_optimizer,
+)
+from aesara.tensor.basic_opt import constant_folding
+from aesara.tensor.math import dot
+from aesara.tensor.subtensor import AdvancedSubtensor
+from aesara.tensor.type import matrix
+from aesara.tensor.type_other import MakeSlice, SliceConstant, slicetype
 from tests.graph.utils import (
     MyType,
     MyVariable,
@@ -12,28 +34,6 @@
     op_y,
     op_z,
 )
-from theano.assert_op import assert_op
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Constant
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import Op
-from theano.graph.opt import (
-    EquilibriumOptimizer,
-    MergeOptimizer,
-    OpKeyOptimizer,
-    OpSub,
-    PatternSub,
-    TopoOptimizer,
-    logging,
-    pre_constant_merge,
-    pre_greedy_local_optimizer,
-    theano,
-)
-from theano.tensor.basic_opt import constant_folding
-from theano.tensor.math import dot
-from theano.tensor.subtensor import AdvancedSubtensor
-from theano.tensor.type import matrix
-from theano.tensor.type_other import MakeSlice, SliceConstant, slicetype
 
 
 def inputs():
@@ -335,10 +335,10 @@ def test_one_assert_merge(self):
         e = dot(x1, x2) + dot(assert_op(x1, (x1 > x2).all()), x2)
         g = FunctionGraph([x1, x2], [e])
         MergeOptimizer().optimize(g)
-        strg = theano.printing.debugprint(g, file="str")
+        strg = aesara.printing.debugprint(g, file="str")
         strref = """Elemwise{add,no_inplace} [id A] ''   4
  |dot [id B] ''   3
- | |Assert{msg='Theano Assert failed!'} [id C] ''   2
+ | |Assert{msg='Aesara Assert failed!'} [id C] ''   2
  | | |x1 [id D]
  | | |All [id E] ''   1
  | |   |Elemwise{gt,no_inplace} [id F] ''   0
@@ -359,10 +359,10 @@ def test_both_assert_merge_identical(self):
         )
         g = FunctionGraph([x1, x2], [e])
         MergeOptimizer().optimize(g)
-        strg = theano.printing.debugprint(g, file="str")
+        strg = aesara.printing.debugprint(g, file="str")
         strref = """Elemwise{add,no_inplace} [id A] ''   4
  |dot [id B] ''   3
- | |Assert{msg='Theano Assert failed!'} [id C] ''   2
+ | |Assert{msg='Aesara Assert failed!'} [id C] ''   2
  | | |x1 [id D]
  | | |All [id E] ''   1
  | |   |Elemwise{gt,no_inplace} [id F] ''   0
@@ -386,10 +386,10 @@ def test_both_assert_merge_1(self):
         )
         g = FunctionGraph([x1, x2, x3], [e])
         MergeOptimizer().optimize(g)
-        strg = theano.printing.debugprint(g, file="str")
+        strg = aesara.printing.debugprint(g, file="str")
         strref1 = """Elemwise{add,no_inplace} [id A] ''   6
  |dot [id B] ''   5
- | |Assert{msg='Theano Assert failed!'} [id C] ''   4
+ | |Assert{msg='Aesara Assert failed!'} [id C] ''   4
  | | |x1 [id D]
  | | |All [id E] ''   3
  | | | |Elemwise{gt,no_inplace} [id F] ''   1
@@ -404,7 +404,7 @@ def test_both_assert_merge_1(self):
 """
         strref2 = """Elemwise{add,no_inplace} [id A] ''   6
  |dot [id B] ''   5
- | |Assert{msg='Theano Assert failed!'} [id C] ''   4
+ | |Assert{msg='Aesara Assert failed!'} [id C] ''   4
  | | |x1 [id D]
  | | |All [id E] ''   3
  | | | |Elemwise{gt,no_inplace} [id F] ''   1
@@ -431,16 +431,16 @@ def test_both_assert_merge_2(self):
         )
         g = FunctionGraph([x1, x2, x3], [e])
         MergeOptimizer().optimize(g)
-        strg = theano.printing.debugprint(g, file="str")
+        strg = aesara.printing.debugprint(g, file="str")
         strref = """Elemwise{add,no_inplace} [id A] ''   7
  |dot [id B] ''   6
- | |Assert{msg='Theano Assert failed!'} [id C] ''   5
+ | |Assert{msg='Aesara Assert failed!'} [id C] ''   5
  | | |x1 [id D]
  | | |All [id E] ''   3
  | |   |Elemwise{gt,no_inplace} [id F] ''   1
  | |     |x1 [id D]
  | |     |x3 [id G]
- | |Assert{msg='Theano Assert failed!'} [id H] ''   4
+ | |Assert{msg='Aesara Assert failed!'} [id H] ''   4
  |   |x2 [id I]
  |   |All [id J] ''   2
  |     |Elemwise{gt,no_inplace} [id K] ''   0
@@ -462,16 +462,16 @@ def test_both_assert_merge_2_reverse(self):
         )
         g = FunctionGraph([x1, x2, x3], [e])
         MergeOptimizer().optimize(g)
-        strg = theano.printing.debugprint(g, file="str")
+        strg = aesara.printing.debugprint(g, file="str")
         strref = """Elemwise{add,no_inplace} [id A] ''   7
  |dot [id B] ''   6
- | |Assert{msg='Theano Assert failed!'} [id C] ''   5
+ | |Assert{msg='Aesara Assert failed!'} [id C] ''   5
  | | |x1 [id D]
  | | |All [id E] ''   3
  | |   |Elemwise{gt,no_inplace} [id F] ''   1
  | |     |x1 [id D]
  | |     |x3 [id G]
- | |Assert{msg='Theano Assert failed!'} [id H] ''   4
+ | |Assert{msg='Aesara Assert failed!'} [id H] ''   4
  |   |x2 [id I]
  |   |All [id J] ''   2
  |     |Elemwise{gt,no_inplace} [id K] ''   0
@@ -537,8 +537,8 @@ def test_low_use_ratio(self):
         g = FunctionGraph([x, y, z], [e])
         # print 'before', g
         # display pesky warnings along with stdout
-        # also silence logger for 'theano.graph.opt'
-        _logger = logging.getLogger("theano.graph.opt")
+        # also silence logger for 'aesara.graph.opt'
+        _logger = logging.getLogger("aesara.graph.opt")
         oldlevel = _logger.level
         _logger.setLevel(logging.CRITICAL)
         try:
diff --git a/tests/graph/test_optdb.py b/tests/graph/test_optdb.py
index dc5677a841..7345da4339 100644
--- a/tests/graph/test_optdb.py
+++ b/tests/graph/test_optdb.py
@@ -1,6 +1,6 @@
 import pytest
 
-from theano.graph.optdb import DB, opt
+from aesara.graph.optdb import DB, opt
 
 
 class TestDB:
diff --git a/tests/graph/test_params_type.py b/tests/graph/test_params_type.py
index 4c6e17470b..81d09ef4b8 100644
--- a/tests/graph/test_params_type.py
+++ b/tests/graph/test_params_type.py
@@ -1,15 +1,15 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara import tensor as tt
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp, ExternalCOp
+from aesara.graph.params_type import Params, ParamsType
+from aesara.graph.type import EnumList, Generic
+from aesara.scalar import Scalar
+from aesara.tensor.type import TensorType, matrix
 from tests import unittest_tools as utt
-from theano import tensor as tt
-from theano.graph.basic import Apply
-from theano.graph.op import COp, ExternalCOp
-from theano.graph.params_type import Params, ParamsType
-from theano.graph.type import EnumList, Generic
-from theano.scalar import Scalar
-from theano.tensor.type import TensorType, matrix
 
 
 tensor_type_0d = TensorType("float64", tuple())
@@ -344,8 +344,8 @@ def test_op_params(self):
         x = matrix(dtype="float64")
         y1 = QuadraticOpFunc(a, b, c)(x)
         y2 = QuadraticCOpFunc(a, b, c)(x)
-        f1 = theano.function([x], y1)
-        f2 = theano.function([x], y2)
+        f1 = aesara.function([x], y1)
+        f2 = aesara.function([x], y2)
         shape = (100, 100)
         vx = (
             np.random.normal(size=shape[0] * shape[1]).astype("float64").reshape(*shape)
diff --git a/tests/graph/test_sched.py b/tests/graph/test_sched.py
index e0dd4f96e5..59a49144c5 100644
--- a/tests/graph/test_sched.py
+++ b/tests/graph/test_sched.py
@@ -1,14 +1,14 @@
-from theano.graph.basic import io_toposort
-from theano.graph.sched import (
+from aesara.graph.basic import io_toposort
+from aesara.graph.sched import (
     _toposort,
     make_dependence_cmp,
     posort,
     reverse_dict,
     sort_apply_nodes,
 )
-from theano.tensor.math import dot
-from theano.tensor.type import matrix
-from theano.utils import cmp
+from aesara.tensor.math import dot
+from aesara.tensor.type import matrix
+from aesara.utils import cmp
 
 
 def test_dependence():
diff --git a/tests/graph/test_toolbox.py b/tests/graph/test_toolbox.py
index 4d125df482..35e0b297f0 100644
--- a/tests/graph/test_toolbox.py
+++ b/tests/graph/test_toolbox.py
@@ -1,10 +1,10 @@
-from theano.graph.basic import Apply, Variable
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import Op
-from theano.graph.toolbox import NodeFinder, is_same_graph
-from theano.graph.type import Type
-from theano.tensor.math import neg
-from theano.tensor.type import vectors
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import Op
+from aesara.graph.toolbox import NodeFinder, is_same_graph
+from aesara.graph.type import Type
+from aesara.tensor.math import neg
+from aesara.tensor.type import vectors
 
 
 class TestNodeFinder:
diff --git a/tests/graph/test_types.py b/tests/graph/test_types.py
index 0f7f961be1..fcd67b35d6 100644
--- a/tests/graph/test_types.py
+++ b/tests/graph/test_types.py
@@ -3,12 +3,12 @@
 import numpy as np
 import pytest
 
-import theano
-from theano import scalar as ts
-from theano.graph.basic import Apply
-from theano.graph.op import COp
-from theano.graph.type import CDataType, CEnumType, EnumList, EnumType
-from theano.tensor.type import TensorType, continuous_dtypes
+import aesara
+from aesara import scalar as ts
+from aesara.graph.basic import Apply
+from aesara.graph.op import COp
+from aesara.graph.type import CDataType, CEnumType, EnumList, EnumType
+from aesara.tensor.type import TensorType, continuous_dtypes
 
 
 # todo: test generic
@@ -73,18 +73,18 @@ def perform(self, *args, **kwargs):
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_cdata():
     i = TensorType("float32", (False,))()
     c = ProdOp()(i)
     i2 = GetOp()(c)
     mode = None
-    if theano.config.mode == "FAST_COMPILE":
+    if aesara.config.mode == "FAST_COMPILE":
         mode = "FAST_RUN"
 
     # This should be a passthrough function for vectors
-    f = theano.function([i], i2, mode=mode)
+    f = aesara.function([i], i2, mode=mode)
 
     v = np.random.randn(9).astype("float32")
 
@@ -271,7 +271,7 @@ def test_op_with_enumlist(self):
         c_sub = MyOpEnumList("-")(a, b)
         c_multiply = MyOpEnumList("*")(a, b)
         c_divide = MyOpEnumList("/")(a, b)
-        f = theano.function([a, b], [c_add, c_sub, c_multiply, c_divide])
+        f = aesara.function([a, b], [c_add, c_sub, c_multiply, c_divide])
         va = 12
         vb = 15
         ref = [va + vb, va - vb, va * vb, va // vb]
@@ -279,18 +279,18 @@ def test_op_with_enumlist(self):
         assert ref == out, (ref, out)
 
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_op_with_cenumtype(self):
         million = MyOpCEnumType("million")()
         billion = MyOpCEnumType("billion")()
         two_billions = MyOpCEnumType("two_billions")()
-        f = theano.function([], [million, billion, two_billions])
+        f = aesara.function([], [million, billion, two_billions])
         val_million, val_billion, val_two_billions = f()
         assert val_million == 1000000
         assert val_billion == val_million * 1000
         assert val_two_billions == val_billion * 2
 
-    @theano.config.change_flags(**{"cmodule__debug": True})
+    @aesara.config.change_flags(**{"cmodule__debug": True})
     def test_op_with_cenumtype_debug(self):
         self.test_op_with_cenumtype()
diff --git a/tests/graph/test_utils.py b/tests/graph/test_utils.py
index f38016faee..f57b9d15ee 100644
--- a/tests/graph/test_utils.py
+++ b/tests/graph/test_utils.py
@@ -1,14 +1,14 @@
-import theano
-from theano.tensor.type import vector
+import aesara
+from aesara.tensor.type import vector
 
 
 def test_stack_trace():
-    with theano.config.change_flags(traceback__limit=1):
+    with aesara.config.change_flags(traceback__limit=1):
         v = vector()
         assert len(v.tag.trace) == 1
         assert len(v.tag.trace[0]) == 1
 
-    with theano.config.change_flags(traceback__limit=2):
+    with aesara.config.change_flags(traceback__limit=2):
         v = vector()
         assert len(v.tag.trace) == 1
         assert len(v.tag.trace[0]) == 2
diff --git a/tests/graph/utils.py b/tests/graph/utils.py
index 87dec9cab9..35517497d9 100644
--- a/tests/graph/utils.py
+++ b/tests/graph/utils.py
@@ -1,8 +1,8 @@
 import numpy as np
 
-from theano.graph.basic import Apply, Variable
-from theano.graph.op import Op
-from theano.graph.type import Type
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.op import Op
+from aesara.graph.type import Type
 
 
 def is_variable(x):
diff --git a/tests/link/c/test_basic.py b/tests/link/c/test_basic.py
index f457eba890..dc0d42c500 100644
--- a/tests/link/c/test_basic.py
+++ b/tests/link/c/test_basic.py
@@ -1,15 +1,15 @@
 import numpy as np
 import pytest
 
-import theano
-from theano.compile.mode import Mode
-from theano.graph import fg
-from theano.graph.basic import Apply, Constant, Variable
-from theano.graph.op import COp
-from theano.graph.type import CType
-from theano.link.basic import PerformLinker
-from theano.link.c.basic import CLinker, DualLinker, OpWiseCLinker
-from theano.tensor.type import iscalar, matrix, vector
+import aesara
+from aesara.compile.mode import Mode
+from aesara.graph import fg
+from aesara.graph.basic import Apply, Constant, Variable
+from aesara.graph.op import COp
+from aesara.graph.type import CType
+from aesara.link.basic import PerformLinker
+from aesara.link.c.basic import CLinker, DualLinker, OpWiseCLinker
+from aesara.tensor.type import iscalar, matrix, vector
 
 
 def as_variable(x):
@@ -191,7 +191,7 @@ def Env(inputs, outputs):
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_clinker_straightforward():
     x, y, z = inputs()
@@ -202,7 +202,7 @@ def test_clinker_straightforward():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_clinker_literal_inlining():
     x, y, z = inputs()
@@ -218,7 +218,7 @@ def test_clinker_literal_inlining():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_clinker_literal_cache():
     mode = Mode(linker="c")
@@ -232,15 +232,15 @@ def test_clinker_literal_cache():
             [-4.664007e-07, 9.468691e-01, -3.18862e-02],
             [-2.562651e-06, -3.188625e-02, 1.05226e00],
         ],
-        dtype=theano.config.floatX,
+        dtype=aesara.config.floatX,
     )
 
     orientationi = np.array(
-        [59.36276866, 1.06116353, 0.93797339], dtype=theano.config.floatX
+        [59.36276866, 1.06116353, 0.93797339], dtype=aesara.config.floatX
     )
 
     for out1 in [A - input1[0] * np.identity(3), input1[0] * np.identity(3)]:
-        benchmark = theano.function(
+        benchmark = aesara.function(
             inputs=[A, input1], outputs=[out1], on_unused_input="ignore", mode=mode
         )
 
@@ -248,7 +248,7 @@ def test_clinker_literal_cache():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_clinker_single_node():
     x, y, z = inputs()
@@ -259,7 +259,7 @@ def test_clinker_single_node():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_clinker_dups():
     # Testing that duplicate inputs are allowed.
@@ -272,7 +272,7 @@ def test_clinker_dups():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_clinker_not_used_inputs():
     # Testing that unused inputs are allowed.
@@ -284,7 +284,7 @@ def test_clinker_not_used_inputs():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_clinker_dups_inner():
     # Testing that duplicates are allowed inside the graph
@@ -305,7 +305,7 @@ def test_opwiseclinker_straightforward():
     e = add(mul(add(x, y), div(x, y)), bad_sub(bad_sub(x, y), z))
     lnk = OpWiseCLinker().accept(Env([x, y, z], [e]))
     fn = lnk.make_function()
-    if theano.config.cxx:
+    if aesara.config.cxx:
         assert fn(2.0, 2.0, 2.0) == 2.0
     else:
         # The python version of bad_sub always return -10.
@@ -346,7 +346,7 @@ def test_duallinker_straightforward():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_duallinker_mismatch():
     x, y, z = inputs()
@@ -393,7 +393,7 @@ def impl(self, x, y):
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_c_fail_error():
     x, y, z = inputs()
@@ -406,19 +406,19 @@ def test_c_fail_error():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_shared_input_output():
     # Test bug reported on the mailing list by Alberto Orlandi
     # https://groups.google.com/d/topic/theano-users/6dLaEqc2R6g/discussion
     # The shared variable is both an input and an output of the function.
     inc = iscalar("inc")
-    state = theano.shared(0)
+    state = aesara.shared(0)
     state.name = "state"
     linker = CLinker()
     mode = Mode(linker=linker)
-    f = theano.function([inc], state, updates=[(state, state + inc)], mode=mode)
-    g = theano.function([inc], state, updates=[(state, state + inc)])
+    f = aesara.function([inc], state, updates=[(state, state + inc)], mode=mode)
+    g = aesara.function([inc], state, updates=[(state, state + inc)])
 
     # Initial value
     f0 = f(0)
@@ -439,10 +439,10 @@ def test_shared_input_output():
     g0 = g(0)
     assert f0 == g0 == 5, (f0, g0)
 
-    vstate = theano.shared(np.zeros(3, dtype="int32"))
+    vstate = aesara.shared(np.zeros(3, dtype="int32"))
     vstate.name = "vstate"
-    fv = theano.function([inc], vstate, updates=[(vstate, vstate + inc)], mode=mode)
-    gv = theano.function([inc], vstate, updates=[(vstate, vstate + inc)])
+    fv = aesara.function([inc], vstate, updates=[(vstate, vstate + inc)], mode=mode)
+    gv = aesara.function([inc], vstate, updates=[(vstate, vstate + inc)])
 
     # Initial value
     fv0 = fv(0)
diff --git a/tests/link/c/test_cmodule.py b/tests/link/c/test_cmodule.py
index f55cd64956..1fa9c5f786 100644
--- a/tests/link/c/test_cmodule.py
+++ b/tests/link/c/test_cmodule.py
@@ -10,12 +10,12 @@
 
 import numpy as np
 
-import theano
-from theano.link.c.cmodule import GCC_compiler, default_blas_ldflags
-from theano.tensor.type import dvectors
+import aesara
+from aesara.link.c.cmodule import GCC_compiler, default_blas_ldflags
+from aesara.tensor.type import dvectors
 
 
-class MyOp(theano.compile.ops.DeepCopyOp):
+class MyOp(aesara.compile.ops.DeepCopyOp):
     nb_called = 0
 
     def c_code_cache_version(self):
@@ -32,7 +32,7 @@ def c_code(self, node, name, inames, onames, sub):
             rand = np.random.rand()
             return ('printf("%(rand)s\\n");' + code) % locals()
         # Else, no C code
-        return super(theano.compile.ops.DeepCopyOp, self).c_code(
+        return super(aesara.compile.ops.DeepCopyOp, self).c_code(
             node, name, inames, onames, sub
         )
 
@@ -48,18 +48,18 @@ def test_inter_process_cache():
     # node.inputs[*].owner like the name of the variable.
 
     x, y = dvectors("xy")
-    f = theano.function([x, y], [MyOp()(x), MyOp()(y)])
+    f = aesara.function([x, y], [MyOp()(x), MyOp()(y)])
     f(np.arange(60), np.arange(60))
-    if theano.config.mode == "FAST_COMPILE" or theano.config.cxx == "":
+    if aesara.config.mode == "FAST_COMPILE" or aesara.config.cxx == "":
         assert MyOp.nb_called == 0
     else:
         assert MyOp.nb_called == 1
 
     # What if we compile a new function with new variables?
     x, y = dvectors("xy")
-    f = theano.function([x, y], [MyOp()(x), MyOp()(y)])
+    f = aesara.function([x, y], [MyOp()(x), MyOp()(y)])
     f(np.arange(60), np.arange(60))
-    if theano.config.mode == "FAST_COMPILE" or theano.config.cxx == "":
+    if aesara.config.mode == "FAST_COMPILE" or aesara.config.cxx == "":
         assert MyOp.nb_called == 0
     else:
         assert MyOp.nb_called == 1
@@ -73,8 +73,8 @@ def test_flag_detection():
     GCC_compiler.try_flags(["-lblas"])
 
 
-@patch("theano.link.c.cmodule.try_blas_flag", return_value=None)
-@patch("theano.link.c.cmodule.sys")
+@patch("aesara.link.c.cmodule.try_blas_flag", return_value=None)
+@patch("aesara.link.c.cmodule.sys")
 def test_default_blas_ldflags(sys_mock, try_blas_flag_mock, caplog):
 
     sys_mock.version = "3.8.0 | packaged by conda-forge | (default, Nov 22 2019, 19:11:38) \n[GCC 7.3.0]"
diff --git a/tests/link/test_jax.py b/tests/link/test_jax.py
index 2ef02016f0..a7d1773ce6 100644
--- a/tests/link/test_jax.py
+++ b/tests/link/test_jax.py
@@ -3,34 +3,34 @@
 import numpy as np
 import pytest
 
-import theano.scalar.basic as ts
-from theano.compile.function import function
-from theano.compile.mode import Mode
-from theano.compile.ops import DeepCopyOp, ViewOp
-from theano.compile.sharedvalue import shared
-from theano.configdefaults import config
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import get_test_value
-from theano.graph.optdb import Query
-from theano.ifelse import ifelse
-from theano.link.jax import JAXLinker
-from theano.scan.basic import scan
-from theano.tensor import basic as tt
-from theano.tensor import blas as tt_blas
-from theano.tensor import elemwise as tt_elemwise
-from theano.tensor import extra_ops as tt_extra_ops
-from theano.tensor import nlinalg as tt_nlinalg
-from theano.tensor import nnet as tt_nnet
-from theano.tensor import slinalg as tt_slinalg
-from theano.tensor import subtensor as tt_subtensor
-from theano.tensor.math import MaxAndArgmax
-from theano.tensor.math import all as tt_all
-from theano.tensor.math import clip, cosh, gammaln, log
-from theano.tensor.math import max as tt_max
-from theano.tensor.math import maximum, prod
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.shape import Shape, Shape_i, SpecifyShape, reshape
-from theano.tensor.type import (
+import aesara.scalar.basic as ts
+from aesara.compile.function import function
+from aesara.compile.mode import Mode
+from aesara.compile.ops import DeepCopyOp, ViewOp
+from aesara.compile.sharedvalue import shared
+from aesara.configdefaults import config
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import get_test_value
+from aesara.graph.optdb import Query
+from aesara.ifelse import ifelse
+from aesara.link.jax import JAXLinker
+from aesara.scan.basic import scan
+from aesara.tensor import basic as tt
+from aesara.tensor import blas as tt_blas
+from aesara.tensor import elemwise as tt_elemwise
+from aesara.tensor import extra_ops as tt_extra_ops
+from aesara.tensor import nlinalg as tt_nlinalg
+from aesara.tensor import nnet as tt_nnet
+from aesara.tensor import slinalg as tt_slinalg
+from aesara.tensor import subtensor as tt_subtensor
+from aesara.tensor.math import MaxAndArgmax
+from aesara.tensor.math import all as tt_all
+from aesara.tensor.math import clip, cosh, gammaln, log
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.math import maximum, prod
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.shape import Shape, Shape_i, SpecifyShape, reshape
+from aesara.tensor.type import (
     dscalar,
     dvector,
     iscalar,
@@ -48,7 +48,7 @@
 
 
 @pytest.fixture(scope="module", autouse=True)
-def set_theano_flags():
+def set_aesara_flags():
     with config.change_flags(cxx="", compute_test_value="ignore"):
         yield
 
@@ -61,14 +61,14 @@ def compare_jax_and_py(
 ):
     """Function to compare python graph output and jax compiled output for testing equality
 
-    In the tests below computational graphs are defined in Theano. These graphs are then passed to
+    In the tests below computational graphs are defined in Aesara. These graphs are then passed to
     this function which then compiles the graphs in both jax and python, runs the calculation
     in both and checks if the results are the same
 
     Parameters
     ----------
     fgraph: FunctionGraph
-        Theano function Graph object
+        Aesara function Graph object
     inputs: iter
         Inputs for function graph
     assert_fn: func, opt
@@ -90,8 +90,8 @@ def compare_jax_and_py(
     jax_mode = Mode(JAXLinker(), opts)
     py_mode = Mode("py", opts)
 
-    theano_jax_fn = function(fgraph.inputs, fgraph.outputs, mode=jax_mode)
-    jax_res = theano_jax_fn(*inputs)
+    aesara_jax_fn = function(fgraph.inputs, fgraph.outputs, mode=jax_mode)
+    jax_res = aesara_jax_fn(*inputs)
 
     if must_be_device_array:
         if isinstance(jax_res, list):
@@ -101,8 +101,8 @@ def compare_jax_and_py(
         else:
             assert isinstance(jax_res, jax.interpreters.xla.DeviceArray)
 
-    theano_py_fn = function(fgraph.inputs, fgraph.outputs, mode=py_mode)
-    py_res = theano_py_fn(*inputs)
+    aesara_py_fn = function(fgraph.inputs, fgraph.outputs, mode=py_mode)
+    py_res = aesara_py_fn(*inputs)
 
     if len(fgraph.outputs) > 1:
         for j, p in zip(jax_res, py_res):
@@ -366,7 +366,7 @@ def binom_log_prob(n, p, value):
     delta = scalar("delta")
 
     # TODO: Use random streams when their JAX conversions are implemented.
-    # trng = theano.tensor.random.RandomStream(1234)
+    # trng = aesara.tensor.random.RandomStream(1234)
 
     def seir_one_step(ct0, dt0, st0, et0, it0, logp_c, logp_d, beta, gamma, delta):
         # bt0 = trng.binomial(n=st0, p=beta)
@@ -865,9 +865,9 @@ def test_jax_BatchedDot():
     inputs = [get_test_value(a)[:-1], get_test_value(b)]
     opts = Query(include=[None], exclude=["cxx_only", "BlasOpt"])
     jax_mode = Mode(JAXLinker(), opts)
-    theano_jax_fn = function(fgraph.inputs, fgraph.outputs, mode=jax_mode)
+    aesara_jax_fn = function(fgraph.inputs, fgraph.outputs, mode=jax_mode)
     with pytest.raises(TypeError):
-        theano_jax_fn(*inputs)
+        aesara_jax_fn(*inputs)
 
     # matrix . matrix
     a = matrix("a")
@@ -882,14 +882,14 @@ def test_jax_BatchedDot():
 def test_shared():
     a = shared(np.array([1, 2, 3], dtype=config.floatX))
 
-    theano_jax_fn = function([], a, mode="JAX")
-    jax_res = theano_jax_fn()
+    aesara_jax_fn = function([], a, mode="JAX")
+    jax_res = aesara_jax_fn()
 
     assert isinstance(jax_res, jax.interpreters.xla.DeviceArray)
     np.testing.assert_allclose(jax_res, a.get_value())
 
-    theano_jax_fn = function([], a * 2, mode="JAX")
-    jax_res = theano_jax_fn()
+    aesara_jax_fn = function([], a * 2, mode="JAX")
+    jax_res = aesara_jax_fn()
 
     assert isinstance(jax_res, jax.interpreters.xla.DeviceArray)
     np.testing.assert_allclose(jax_res, a.get_value() * 2)
@@ -899,7 +899,7 @@ def test_shared():
     new_a_value = np.array([3, 4, 5], dtype=config.floatX)
     a.set_value(new_a_value)
 
-    jax_res = theano_jax_fn()
+    jax_res = aesara_jax_fn()
     assert isinstance(jax_res, jax.interpreters.xla.DeviceArray)
     np.testing.assert_allclose(jax_res, new_a_value * 2)
 
diff --git a/tests/link/test_link.py b/tests/link/test_link.py
index 7a891dd7ec..2d89de1de1 100644
--- a/tests/link/test_link.py
+++ b/tests/link/test_link.py
@@ -2,16 +2,16 @@
 
 import numpy as np
 
-import theano
-from theano.compile.mode import Mode
-from theano.graph import fg
-from theano.graph.basic import Apply, Constant, Variable, clone
-from theano.graph.op import Op
-from theano.graph.type import Type
-from theano.link.basic import Container, PerformLinker, WrapLinker
-from theano.link.c.basic import OpWiseCLinker
-from theano.tensor.type import matrix, scalar
-from theano.utils import cmp
+import aesara
+from aesara.compile.mode import Mode
+from aesara.graph import fg
+from aesara.graph.basic import Apply, Constant, Variable, clone
+from aesara.graph.op import Op
+from aesara.graph.type import Type
+from aesara.link.basic import Container, PerformLinker, WrapLinker
+from aesara.link.c.basic import OpWiseCLinker
+from aesara.tensor.type import matrix, scalar
+from aesara.utils import cmp
 
 
 def as_variable(x):
@@ -177,18 +177,18 @@ def wrap(fgraph, i, node, th):
 
 
 def test_sort_schedule_fn():
-    import theano
-    from theano.graph.sched import make_depends, sort_schedule_fn
+    import aesara
+    from aesara.graph.sched import make_depends, sort_schedule_fn
 
     x = matrix("x")
-    y = theano.tensor.dot(x[:5] * 2, x.T + 1).T
+    y = aesara.tensor.dot(x[:5] * 2, x.T + 1).T
 
     def str_cmp(a, b):
         return cmp(str(a), str(b))  # lexicographical sort
 
     linker = OpWiseCLinker(schedule=sort_schedule_fn(str_cmp))
     mode = Mode(linker=linker)
-    f = theano.function((x,), (y,), mode=mode)
+    f = aesara.function((x,), (y,), mode=mode)
 
     nodes = f.maker.linker.make_all()[-1]
     depends = make_depends()
@@ -204,7 +204,7 @@ def test_container_deepcopy():
     # It seam that numpy.asarray(0.).astype(floatX) can return a numpy
     # scalar with some NumPy Version. So we call numpy.asarray with
     # the dtype parameter.
-    v = np.asarray(0.0, dtype=theano.config.floatX)
+    v = np.asarray(0.0, dtype=aesara.config.floatX)
     assert isinstance(v, np.ndarray), type(v)
     for readonly in [True, False]:
         c = Container(t, [v], readonly=readonly)
diff --git a/tests/link/test_vm.py b/tests/link/test_vm.py
index dc365eed8d..71e6742be7 100644
--- a/tests/link/test_vm.py
+++ b/tests/link/test_vm.py
@@ -5,20 +5,20 @@
 import numpy as np
 import pytest
 
-from theano.compile.function import function
-from theano.compile.io import In
-from theano.compile.mode import Mode, get_mode
-from theano.compile.sharedvalue import shared
-from theano.configdefaults import config
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.ifelse import ifelse
-from theano.link.c.basic import OpWiseCLinker
-from theano.link.c.exceptions import MissingGXX
-from theano.link.vm import Loop, VMLinker
-from theano.tensor.math import cosh, sin, tanh
-from theano.tensor.type import dvector, lscalar, scalar, scalars, vector, vectors
-from theano.tensor.var import TensorConstant
+from aesara.compile.function import function
+from aesara.compile.io import In
+from aesara.compile.mode import Mode, get_mode
+from aesara.compile.sharedvalue import shared
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.ifelse import ifelse
+from aesara.link.c.basic import OpWiseCLinker
+from aesara.link.c.exceptions import MissingGXX
+from aesara.link.vm import Loop, VMLinker
+from aesara.tensor.math import cosh, sin, tanh
+from aesara.tensor.type import dvector, lscalar, scalar, scalars, vector, vectors
+from aesara.tensor.var import TensorConstant
 
 
 class TestCallbacks:
@@ -383,7 +383,7 @@ def perform(self, node, inputs, outputs):
 
 
 def test_vm_gc():
-    # This already caused a bug in the trunk of Theano.
+    # This already caused a bug in the trunk of Aesara.
     #
     # The bug was introduced in the trunk on July 5th, 2012 and fixed on
     # July 30th.
@@ -456,7 +456,7 @@ def test_no_recycling():
 )
 def test_VMLinker_make_vm_cvm():
     # We don't want this at module level, since CXX might not be present
-    from theano.link.c.cvm import CVM
+    from aesara.link.c.cvm import CVM
 
     a = scalar()
     linker = VMLinker(allow_gc=False, use_cloop=True)
@@ -473,17 +473,17 @@ def test_VMLinker_make_vm_no_cvm():
 
         # Make sure that GXX isn't present
         with pytest.raises(MissingGXX):
-            import theano.link.c.cvm
+            import aesara.link.c.cvm
 
-            reload(theano.link.c.cvm)
+            reload(aesara.link.c.cvm)
 
         # Make sure that `cvm` module is missing
-        with patch.dict("sys.modules", {"theano.link.c.cvm": None}):
+        with patch.dict("sys.modules", {"aesara.link.c.cvm": None}):
             a = scalar()
             linker = VMLinker(allow_gc=False, use_cloop=True)
 
             with pytest.raises(ModuleNotFoundError):
-                import theano.link.c.cvm
+                import aesara.link.c.cvm
 
             f = function([a], a, mode=Mode(optimizer=None, linker=linker))
             assert isinstance(f.fn, Loop)
diff --git a/tests/misc/test_may_share_memory.py b/tests/misc/test_may_share_memory.py
index 7a3511ce68..9afd2afe06 100644
--- a/tests/misc/test_may_share_memory.py
+++ b/tests/misc/test_may_share_memory.py
@@ -12,8 +12,8 @@
 except ImportError:
     scipy_imported = False
 
-from theano.misc.may_share_memory import may_share_memory
-from theano.misc.safe_asarray import _asarray
+from aesara.misc.may_share_memory import may_share_memory
+from aesara.misc.safe_asarray import _asarray
 
 
 def may_share_memory_core(a, b):
diff --git a/tests/misc/test_pkl_utils.py b/tests/misc/test_pkl_utils.py
index 16d2218221..6413202c95 100644
--- a/tests/misc/test_pkl_utils.py
+++ b/tests/misc/test_pkl_utils.py
@@ -4,10 +4,10 @@
 
 import numpy as np
 
-import theano
-from theano.misc.pkl_utils import StripPickler, dump, load
-from theano.sandbox.rng_mrg import MRG_RandomStream
-from theano.tensor.type import matrix
+import aesara
+from aesara.misc.pkl_utils import StripPickler, dump, load
+from aesara.sandbox.rng_mrg import MRG_RandomStream
+from aesara.tensor.type import matrix
 
 
 class TestDumpLoad:
@@ -35,9 +35,9 @@ def test_dump_load_mrg(self):
         assert type(rng) == MRG_RandomStream
 
     def test_dump_zip_names(self):
-        foo_1 = theano.shared(0, name="foo")
-        foo_2 = theano.shared(1, name="foo")
-        foo_3 = theano.shared(2, name="foo")
+        foo_1 = aesara.shared(0, name="foo")
+        foo_2 = aesara.shared(1, name="foo")
+        foo_3 = aesara.shared(2, name="foo")
         with open("model.zip", "wb") as f:
             dump((foo_1, foo_2, foo_3, np.array(3)), f)
         keys = list(np.load("model.zip").keys())
diff --git a/tests/record.py b/tests/record.py
index 29e2f8c32b..6a00af205b 100644
--- a/tests/record.py
+++ b/tests/record.py
@@ -1,8 +1,8 @@
-from theano.compile.mode import Mode
-from theano.configdefaults import config
-from theano.link.basic import WrapLinkerMany
-from theano.link.vm import VMLinker
-from theano.printing import hex_digest, min_informative_str
+from aesara.compile.mode import Mode
+from aesara.configdefaults import config
+from aesara.link.basic import WrapLinkerMany
+from aesara.link.vm import VMLinker
+from aesara.printing import hex_digest, min_informative_str
 
 
 __authors__ = ["PyMC Team", "Ian Goodfellow"]
@@ -123,10 +123,10 @@ class RecordMode(Mode):
 
        # Then compile and call the function you wish to test, which uses
        # Apply nodes with record_mode as first parameter to record all the
-       # computations to file. For example, call a Theano function with the
+       # computations to file. For example, call an Aesara function with the
        # RecordMode object.
-       x = theano.tensor.dscalar()
-       f = theano.function([x], 2*x, mode=record_mode)
+       x = aesara.tensor.dscalar()
+       f = aesara.function([x], 2*x, mode=record_mode)
        print f(4)
 
        # Create another RecordMode object and initialize it with the previous
@@ -138,8 +138,8 @@ class RecordMode(Mode):
        # Compile and call the function to test again with record_mode as
        # first parameter. An exception will be thrown if the recorded
        # computations are not identical between the two runs.
-       x = theano.tensor.dscalar()
-       f = theano.function([x], 2*x, mode=playback_mode)
+       x = aesara.tensor.dscalar()
+       f = aesara.function([x], 2*x, mode=playback_mode)
        print f(4)
 
     """
@@ -248,7 +248,7 @@ def digest(x):
             line = f"Outputs: {outputs_digest}\n"
             handle_line(fgraph, line, i, node, fn)
 
-        # linker = theano.link.c.basic.OpWiseCLinker()
+        # linker = aesara.link.c.basic.OpWiseCLinker()
         linker = VMLinker(use_cloop=bool(config.cxx))
 
         wrap_linker = WrapLinkerMany([linker], [callback])
diff --git a/tests/sandbox/linalg/test_linalg.py b/tests/sandbox/linalg/test_linalg.py
index 52edb274e6..45d1f1a3e2 100644
--- a/tests/sandbox/linalg/test_linalg.py
+++ b/tests/sandbox/linalg/test_linalg.py
@@ -2,16 +2,14 @@
 import numpy.linalg
 import pytest
 
-import theano
-from tests import unittest_tools as utt
-from tests.test_rop import break_op
-from theano import function
-from theano import tensor as tt
-from theano.configdefaults import config
+import aesara
+from aesara import function
+from aesara import tensor as tt
+from aesara.configdefaults import config
 
 # The one in comment are not tested...
-from theano.sandbox.linalg.ops import Cholesky  # PSD_hint,; op class
-from theano.sandbox.linalg.ops import (
+from aesara.sandbox.linalg.ops import Cholesky  # PSD_hint,; op class
+from aesara.sandbox.linalg.ops import (
     Solve,
     imported_scipy,
     inv_as_solve,
@@ -19,10 +17,12 @@
     solve,
     spectral_radius_bound,
 )
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.math import _allclose
-from theano.tensor.nlinalg import MatrixInverse
-from theano.tensor.type import dmatrix, matrix, vector
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.math import _allclose
+from aesara.tensor.nlinalg import MatrixInverse
+from aesara.tensor.type import dmatrix, matrix, vector
+from tests import unittest_tools as utt
+from tests.test_rop import break_op
 
 
 def test_rop_lop():
@@ -31,19 +31,19 @@ def test_rop_lop():
     v = vector("v")
     y = matrix_inverse(mx).sum(axis=0)
 
-    yv = theano.gradient.Rop(y, mx, mv)
+    yv = aesara.gradient.Rop(y, mx, mv)
     rop_f = function([mx, mv], yv)
 
-    sy, _ = theano.scan(
-        lambda i, y, x, v: (theano.gradient.grad(y[i], x) * v).sum(),
+    sy, _ = aesara.scan(
+        lambda i, y, x, v: (aesara.gradient.grad(y[i], x) * v).sum(),
         sequences=tt.arange(y.shape[0]),
         non_sequences=[y, mx, mv],
     )
     scan_f = function([mx, mv], sy)
 
     rng = np.random.RandomState(utt.fetch_seed())
-    vx = np.asarray(rng.randn(4, 4), theano.config.floatX)
-    vv = np.asarray(rng.randn(4, 4), theano.config.floatX)
+    vx = np.asarray(rng.randn(4, 4), aesara.config.floatX)
+    vv = np.asarray(rng.randn(4, 4), aesara.config.floatX)
 
     v1 = rop_f(vx, vv)
     v2 = scan_f(vx, vv)
@@ -52,7 +52,7 @@ def test_rop_lop():
 
     raised = False
     try:
-        theano.gradient.Rop(theano.clone_replace(y, replace={mx: break_op(mx)}), mx, mv)
+        aesara.gradient.Rop(aesara.clone_replace(y, replace={mx: break_op(mx)}), mx, mv)
     except ValueError:
         raised = True
     if not raised:
@@ -61,11 +61,11 @@ def test_rop_lop():
             " is not differentiable"
         )
 
-    vv = np.asarray(rng.uniform(size=(4,)), theano.config.floatX)
-    yv = theano.gradient.Lop(y, mx, v)
+    vv = np.asarray(rng.uniform(size=(4,)), aesara.config.floatX)
+    yv = aesara.gradient.Lop(y, mx, v)
     lop_f = function([mx, v], yv)
 
-    sy = theano.gradient.grad((v * y).sum(), mx)
+    sy = aesara.gradient.grad((v * y).sum(), mx)
     scan_f = function([mx, v], sy)
 
     v1 = lop_f(vx, vv)
@@ -78,23 +78,23 @@ def test_spectral_radius_bound():
     rng = np.random.RandomState(utt.fetch_seed())
     x = matrix()
     radius_bound = spectral_radius_bound(x, 5)
-    f = theano.function([x], radius_bound)
+    f = aesara.function([x], radius_bound)
 
     shp = (3, 4)
     m = rng.rand(*shp)
     m = np.cov(m).astype(config.floatX)
-    radius_bound_theano = f(m)
+    radius_bound_aesara = f(m)
 
     # test the approximation
     mm = m
     for i in range(5):
         mm = np.dot(mm, mm)
     radius_bound_numpy = np.trace(mm) ** (2 ** (-5))
-    assert abs(radius_bound_numpy - radius_bound_theano) < tol
+    assert abs(radius_bound_numpy - radius_bound_aesara) < tol
 
     # test the bound
     eigen_val = numpy.linalg.eig(m)
-    assert (eigen_val[0].max() - radius_bound_theano) < tol
+    assert (eigen_val[0].max() - radius_bound_aesara) < tol
 
     # test type errors
     xx = vector()
@@ -122,9 +122,9 @@ def test_spectral_radius_bound():
 
 def test_transinv_to_invtrans():
     X = matrix("X")
-    Y = theano.tensor.nlinalg.matrix_inverse(X)
+    Y = aesara.tensor.nlinalg.matrix_inverse(X)
     Z = Y.transpose()
-    f = theano.function([X], Z)
+    f = aesara.function([X], Z)
     if config.mode != "FAST_COMPILE":
         for node in f.maker.fgraph.toposort():
             if isinstance(node.op, MatrixInverse):
@@ -144,12 +144,12 @@ def test_tag_solve_triangular():
     U = cholesky_upper(A)
     b1 = solve(L, x)
     b2 = solve(U, x)
-    f = theano.function([A, x], b1)
+    f = aesara.function([A, x], b1)
     if config.mode != "FAST_COMPILE":
         for node in f.maker.fgraph.toposort():
             if isinstance(node.op, Solve):
                 assert node.op.A_structure == "lower_triangular"
-    f = theano.function([A, x], b2)
+    f = aesara.function([A, x], b2)
     if config.mode != "FAST_COMPILE":
         for node in f.maker.fgraph.toposort():
             if isinstance(node.op, Solve):
diff --git a/tests/sandbox/test_minimal.py b/tests/sandbox/test_minimal.py
index 3da335be67..44d3802d95 100644
--- a/tests/sandbox/test_minimal.py
+++ b/tests/sandbox/test_minimal.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pytest
 
+from aesara import function
+from aesara.sandbox.minimal import minimal
+from aesara.tensor.type import matrix, vector
 from tests import unittest_tools as utt
-from theano import function
-from theano.sandbox.minimal import minimal
-from theano.tensor.type import matrix, vector
 
 
 @pytest.mark.skip(reason="Unfinished test")
diff --git a/tests/sandbox/test_multinomial.py b/tests/sandbox/test_multinomial.py
index 5d758b2cc8..5bdf6619ad 100644
--- a/tests/sandbox/test_multinomial.py
+++ b/tests/sandbox/test_multinomial.py
@@ -1,10 +1,10 @@
 import numpy as np
 
 import tests.unittest_tools as utt
-from theano import function
-from theano.configdefaults import config
-from theano.sandbox.multinomial import MultinomialFromUniform
-from theano.tensor.type import dmatrix, dvector, fmatrix, fvector, iscalar
+from aesara import function
+from aesara.configdefaults import config
+from aesara.sandbox.multinomial import MultinomialFromUniform
+from aesara.tensor.type import dmatrix, dvector, fmatrix, fvector, iscalar
 
 
 def test_n_samples_1():
diff --git a/tests/sandbox/test_multinomial_wo_replacement.py b/tests/sandbox/test_multinomial_wo_replacement.py
index 5f9177d393..d53d558b8c 100644
--- a/tests/sandbox/test_multinomial_wo_replacement.py
+++ b/tests/sandbox/test_multinomial_wo_replacement.py
@@ -1,11 +1,11 @@
 import numpy as np
 import pytest
 
-from theano import function
-from theano.configdefaults import config
-from theano.sandbox import multinomial
-from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream
-from theano.tensor.type import fmatrix, fvector, iscalar
+from aesara import function
+from aesara.configdefaults import config
+from aesara.sandbox import multinomial
+from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
+from aesara.tensor.type import fmatrix, fvector, iscalar
 
 
 class TestOP:
diff --git a/tests/sandbox/test_rng_mrg.py b/tests/sandbox/test_rng_mrg.py
index 60089b1045..3957aae157 100644
--- a/tests/sandbox/test_rng_mrg.py
+++ b/tests/sandbox/test_rng_mrg.py
@@ -5,19 +5,19 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara.compile.function import function
+from aesara.compile.sharedvalue import shared
+from aesara.configdefaults import config
+from aesara.gradient import NullTypeGradError, UndefinedGrad, grad, zero_grad
+from aesara.sandbox import rng_mrg
+from aesara.sandbox.rng_mrg import MRG_RandomStream, mrg_uniform
+from aesara.scan.basic import scan
+from aesara.tensor.basic import as_tensor_variable, cast
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.random.utils import RandomStream
+from aesara.tensor.type import iscalar, ivector, lmatrix, matrix, scalar, vector
 from tests import unittest_tools as utt
-from theano.compile.function import function
-from theano.compile.sharedvalue import shared
-from theano.configdefaults import config
-from theano.gradient import NullTypeGradError, UndefinedGrad, grad, zero_grad
-from theano.sandbox import rng_mrg
-from theano.sandbox.rng_mrg import MRG_RandomStream, mrg_uniform
-from theano.scan.basic import scan
-from theano.tensor.basic import as_tensor_variable, cast
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.random.utils import RandomStream
-from theano.tensor.type import iscalar, ivector, lmatrix, matrix, scalar, vector
 
 
 # TODO: test MRG_RandomStream
@@ -34,7 +34,7 @@
 # 5 samples drawn from each substream
 java_samples = np.loadtxt(
     os.path.join(
-        os.path.split(theano.__file__)[0], "sandbox", "samples_MRG31k3p_12_7_5.txt"
+        os.path.split(aesara.__file__)[0], "sandbox", "samples_MRG31k3p_12_7_5.txt"
     )
 )
 
@@ -334,7 +334,7 @@ def test_broadcastable():
             uu = distribution(pvals=pvals_1)
             assert uu.broadcastable == (False, True)
 
-            # check when some dimensions are theano variables
+            # check when some dimensions are aesara variables
             uu = distribution(pvals=pvals_2)
             assert uu.broadcastable == (False, True)
         else:
@@ -342,7 +342,7 @@ def test_broadcastable():
             uu = distribution(size=size1)
             assert uu.broadcastable == (False, True)
 
-            # check when some dimensions are theano variables
+            # check when some dimensions are aesara variables
             uu = distribution(size=size2)
             assert uu.broadcastable == (False, True)
 
@@ -813,7 +813,7 @@ def test_bad_size(self):
 def test_multiple_rng_aliasing():
     # Test that when we have multiple random number generators, we do not alias
     # the state_updates member. `state_updates` can be useful when attempting to
-    # copy the (random) state between two similar theano graphs. The test is
+    # copy the (random) state between two similar aesara graphs. The test is
     # meant to detect a previous bug where state_updates was initialized as a
     # class-attribute, instead of the __init__ function.
 
@@ -823,7 +823,7 @@ def test_multiple_rng_aliasing():
 
 
 def test_random_state_transfer():
-    # Test that random state can be transferred from one theano graph to another.
+    # Test that random state can be transferred from one aesara graph to another.
 
     class Graph:
         def __init__(self, seed=123):
@@ -845,11 +845,11 @@ def __init__(self, seed=123):
 def test_gradient_scan():
     # Test for a crash when using MRG inside scan and taking the gradient
     # See https://groups.google.com/d/msg/theano-dev/UbcYyU5m-M8/UO9UgXqnQP0J
-    theano_rng = MRG_RandomStream(10)
+    aesara_rng = MRG_RandomStream(10)
     w = shared(np.ones(1, dtype="float32"))
 
     def one_step(x):
-        return x + theano_rng.uniform((1,), dtype="float32") * w
+        return x + aesara_rng.uniform((1,), dtype="float32") * w
 
     x = vector(dtype="float32")
     values, updates = scan(one_step, outputs_info=x, n_steps=10)
@@ -859,10 +859,10 @@ def one_step(x):
 
 
 def test_simple_shared_mrg_random():
-    theano_rng = MRG_RandomStream(10)
+    aesara_rng = MRG_RandomStream(10)
 
     values, updates = scan(
-        lambda: theano_rng.uniform((2,), -1, 1),
+        lambda: aesara_rng.uniform((2,), -1, 1),
         [],
         [],
         [],
@@ -963,7 +963,7 @@ def rng_mrg_overflow(sizes, fct, mode, should_raise_error):
 
 @pytest.mark.slow
 def test_overflow_cpu():
-    # run with THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32
+    # run with AESARA_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32
     rng = MRG_RandomStream(np.random.randint(1234))
     fct = rng.uniform
     with config.change_flags(compute_test_value="off"):
diff --git a/tests/sandbox/test_sandbox_multinomial_wo_replacement.pkl b/tests/sandbox/test_sandbox_multinomial_wo_replacement.pkl
deleted file mode 100644
index ae3d6b16a3..0000000000
--- a/tests/sandbox/test_sandbox_multinomial_wo_replacement.pkl
+++ /dev/null
@@ -1,16 +0,0 @@
-ccopy_reg
-_reconstructor
-p1
-(ctheano.sandbox.multinomial
-MultinomialWOReplacementFromUniform
-p2
-c__builtin__
-object
-p3
-NtRp4
-(dp5
-S'odtype'
-p6
-S'auto'
-p7
-sb.
\ No newline at end of file
diff --git a/tests/scalar/test_basic.py b/tests/scalar/test_basic.py
index 4d0d29ac7e..7b66c39eef 100644
--- a/tests/scalar/test_basic.py
+++ b/tests/scalar/test_basic.py
@@ -6,17 +6,17 @@
 to use numpy's scalar routines.
 If you do want to rewrite these tests, bear in mind:
   * You don't need to use Composite.
-  * FunctionGraph and DualLinker are old, use theano.compile.function.function instead.
+  * FunctionGraph and DualLinker are old, use aesara.compile.function.function instead.
 """
 import numpy as np
 import pytest
 
+import aesara
 import tests.unittest_tools as utt
-import theano
-from theano.compile.mode import Mode
-from theano.graph.fg import FunctionGraph
-from theano.link.c.basic import DualLinker
-from theano.scalar.basic import (
+from aesara.compile.mode import Mode
+from aesara.graph.fg import FunctionGraph
+from aesara.link.c.basic import DualLinker
+from aesara.scalar.basic import (
     ComplexError,
     Composite,
     InRange,
@@ -64,7 +64,7 @@
     true_div,
     uint8,
 )
-from theano.tensor.type import fscalar, imatrix, matrix
+from aesara.tensor.type import fscalar, imatrix, matrix
 
 
 def test_mul_add_true():
@@ -190,15 +190,15 @@ def test_make_node_continue_graph(self):
         # We test that Composite.make_node accept as inputs Variable
         # some that represent existing computation.
 
-        si0 = theano.scalar.int8()
-        si1 = theano.scalar.int8()
-        si2 = theano.scalar.float32()
+        si0 = aesara.scalar.int8()
+        si1 = aesara.scalar.int8()
+        si2 = aesara.scalar.float32()
         sout = (si0 * si1) / si2
-        sop = theano.scalar.Composite([si0, si1, si2], [sout])
-        si0 = theano.scalar.int8()
-        si1 = theano.scalar.int8()
-        si2 = theano.scalar.float32()
-        si3 = theano.scalar.float32()
+        sop = aesara.scalar.Composite([si0, si1, si2], [sout])
+        si0 = aesara.scalar.int8()
+        si1 = aesara.scalar.int8()
+        si2 = aesara.scalar.float32()
+        si3 = aesara.scalar.float32()
         sop.make_node(si0 * si3, si1, si2)
 
 
@@ -314,10 +314,10 @@ def _test_unary(unary_op, x_range):
         xf = float32("xf")
 
         ei = unary_op(xi)
-        fi = theano.function([xi], ei)
+        fi = aesara.function([xi], ei)
 
         ef = unary_op(xf)
-        ff = theano.function([xf], ef)
+        ff = aesara.function([xf], ef)
 
         for x_val in x_range:
             outi = fi(x_val)
@@ -334,10 +334,10 @@ def _test_binary(binary_op, x_range, y_range):
         yf = float32("yf")
 
         ei = binary_op(xi, yi)
-        fi = theano.function([xi, yi], ei)
+        fi = aesara.function([xi, yi], ei)
 
         ef = binary_op(xf, yf)
-        ff = theano.function([xf, yf], ef)
+        ff = aesara.function([xf, yf], ef)
 
         for x_val in x_range:
             for y_val in y_range:
@@ -355,14 +355,14 @@ def test_true_div(self):
 
         xi = int8("xi")
         yi = int8("yi")
-        xf = Scalar(theano.config.floatX)("xf")
-        yf = Scalar(theano.config.floatX)("yf")
+        xf = Scalar(aesara.config.floatX)("xf")
+        yf = Scalar(aesara.config.floatX)("yf")
 
         ei = true_div(xi, yi)
-        fi = theano.function([xi, yi], ei)
+        fi = aesara.function([xi, yi], ei)
 
         ef = true_div(xf, yf)
-        ff = theano.function([xf, yf], ef)
+        ff = aesara.function([xf, yf], ef)
 
         for x_val in x_range:
             for y_val in y_range:
@@ -395,7 +395,7 @@ def test_grad_gt():
     x = float32(name="x")
     y = float32(name="y")
     z = x > y
-    g = theano.gradient.grad(z, y)
+    g = aesara.gradient.grad(z, y)
     assert g.eval({y: 1.0}) == 0.0
 
 
@@ -409,19 +409,19 @@ def test_grad_switch():
     x = matrix()
     c = matrix()
 
-    s = theano.tensor.switch(c, x, 0)
+    s = aesara.tensor.switch(c, x, 0)
     l = s.sum()
 
-    theano.gradient.grad(l, x)
+    aesara.gradient.grad(l, x)
 
 
 def test_grad_identity():
     # Check that the grad method of Identity correctly handles int dytpes
     x = imatrix("x")
     # tensor_copy is Elemwise{Identity}
-    y = theano.tensor.tensor_copy(x)
-    l = y.sum(dtype=theano.config.floatX)
-    theano.gradient.grad(l, x)
+    y = aesara.tensor.tensor_copy(x)
+    l = y.sum(dtype=aesara.config.floatX)
+    aesara.gradient.grad(l, x)
 
 
 def test_grad_inrange():
@@ -432,7 +432,7 @@ def test_grad_inrange():
         low = fscalar("low")
         high = fscalar("high")
         out = op(x, low, high)
-        gx, glow, ghigh = theano.gradient.grad(out, [x, low, high])
+        gx, glow, ghigh = aesara.gradient.grad(out, [x, low, high])
 
         # We look if the gradient are equal to zero
         # if x is lower than the lower bound,
@@ -441,8 +441,8 @@ def test_grad_inrange():
         # bound.
         # Mathematically we should have an infinite gradient when
         # x is equal to the lower or higher bound but in that case
-        # Theano defines the gradient to be zero for stability.
-        f = theano.function([x, low, high], [gx, glow, ghigh])
+        # Aesara defines the gradient to be zero for stability.
+        f = aesara.function([x, low, high], [gx, glow, ghigh])
         utt.assert_allclose(f(0, 1, 5), [0, 0, 0])
         utt.assert_allclose(f(1, 1, 5), [0, 0, 0])
         utt.assert_allclose(f(2, 1, 5), [0, 0, 0])
@@ -452,10 +452,10 @@ def test_grad_inrange():
 
 def test_grad_abs():
     a = fscalar("a")
-    b = theano.tensor.nnet.relu(a)
-    c = theano.grad(b, a)
-    f = theano.function([a], c, mode=Mode(optimizer=None))
-    # Currently Theano return 0.5, but it isn't sure it won't change
+    b = aesara.tensor.nnet.relu(a)
+    c = aesara.grad(b, a)
+    f = aesara.function([a], c, mode=Mode(optimizer=None))
+    # Currently Aesara return 0.5, but it isn't sure it won't change
     # in the futur.
     ret = f(0.0)
     assert ret == 0.5, ret
diff --git a/tests/scalar/test_basic_sympy.py b/tests/scalar/test_basic_sympy.py
index f67b56e606..cf392afed7 100644
--- a/tests/scalar/test_basic_sympy.py
+++ b/tests/scalar/test_basic_sympy.py
@@ -1,8 +1,8 @@
 import pytest
 
-import theano
-from theano.scalar.basic import floats
-from theano.scalar.basic_sympy import SymPyCCode
+import aesara
+from aesara.scalar.basic import floats
+from aesara.scalar.basic_sympy import SymPyCCode
 
 
 sympy = pytest.importorskip("sympy")
@@ -14,25 +14,25 @@
 xt, yt = floats("xy")
 
 
-@pytest.mark.skipif(not theano.config.cxx, reason="Need cxx for this test")
+@pytest.mark.skipif(not aesara.config.cxx, reason="Need cxx for this test")
 def test_SymPyCCode():
     op = SymPyCCode([xs, ys], xs + ys)
     e = op(xt, yt)
-    g = theano.graph.fg.FunctionGraph([xt, yt], [e])
-    fn = theano.link.c.basic.CLinker().accept(g).make_function()
+    g = aesara.graph.fg.FunctionGraph([xt, yt], [e])
+    fn = aesara.link.c.basic.CLinker().accept(g).make_function()
     assert fn(1.0, 2.0) == 3.0
 
 
 def test_grad():
     op = SymPyCCode([xs], xs ** 2)
     zt = op(xt)
-    ztprime = theano.grad(zt, xt)
+    ztprime = aesara.grad(zt, xt)
     assert ztprime.owner.op.expr == 2 * xs
 
 
 def test_multivar_grad():
     op = SymPyCCode([xs, ys], xs ** 2 + ys ** 3)
     zt = op(xt, yt)
-    dzdx, dzdy = theano.grad(zt, [xt, yt])
+    dzdx, dzdy = aesara.grad(zt, [xt, yt])
     assert dzdx.owner.op.expr == 2 * xs
     assert dzdy.owner.op.expr == 3 * ys ** 2
diff --git a/tests/scalar/test_type.py b/tests/scalar/test_type.py
index 67adc52ebc..eb463b0573 100644
--- a/tests/scalar/test_type.py
+++ b/tests/scalar/test_type.py
@@ -1,7 +1,7 @@
 import numpy as np
 
-from theano.configdefaults import config
-from theano.scalar.basic import (
+from aesara.configdefaults import config
+from aesara.scalar.basic import (
     IntDiv,
     Scalar,
     TrueDiv,
diff --git a/tests/scan/test_basic.py b/tests/scan/test_basic.py
index bcd218789c..6f557a8b0e 100644
--- a/tests/scan/test_basic.py
+++ b/tests/scan/test_basic.py
@@ -3,7 +3,7 @@
 
    * Scan seems to do copies of every input variable. Is that needed?
    answer : probably not, but it doesn't hurt also ( what we copy is
-   theano variables, which just cary information about the type / dimension
+   aesara variables, which just cary information about the type / dimension
    of the data)
 
    * There is some of scan functionality that is not well documented
@@ -20,17 +20,16 @@
 import numpy as np
 import pytest
 
-from tests import unittest_tools as utt
-from theano.assert_op import assert_op
-from theano.compile.debugmode import DebugMode
-from theano.compile.function import function
-from theano.compile.function.pfunc import rebuild_collect_shared
-from theano.compile.io import In
-from theano.compile.mode import FAST_RUN, Mode, get_default_mode, get_mode
-from theano.compile.monitormode import MonitorMode
-from theano.compile.sharedvalue import shared
-from theano.configdefaults import config
-from theano.gradient import (
+from aesara.assert_op import assert_op
+from aesara.compile.debugmode import DebugMode
+from aesara.compile.function import function
+from aesara.compile.function.pfunc import rebuild_collect_shared
+from aesara.compile.io import In
+from aesara.compile.mode import FAST_RUN, Mode, get_default_mode, get_mode
+from aesara.compile.monitormode import MonitorMode
+from aesara.compile.sharedvalue import shared
+from aesara.configdefaults import config
+from aesara.gradient import (
     NullTypeGradError,
     Rop,
     disconnected_grad,
@@ -38,30 +37,30 @@
     hessian,
     jacobian,
 )
-from theano.graph.basic import clone_replace, graph_inputs
-from theano.graph.fg import MissingInputError
-from theano.misc.safe_asarray import _asarray
-from theano.scan.basic import scan
-from theano.scan.op import Scan
-from theano.scan.opt import ScanMerge
-from theano.scan.utils import until
-from theano.scan.views import foldl, foldr
-from theano.scan.views import map as tt_map
-from theano.scan.views import reduce as tt_reduce
-from theano.tensor import basic as tt
-from theano.tensor.blas import Dot22
-from theano.tensor.elemwise import Elemwise
-from theano.tensor.math import Dot
-from theano.tensor.math import all as tt_all
-from theano.tensor.math import dot, mean
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tanh
-from theano.tensor.nnet import categorical_crossentropy, sigmoid, softmax_graph
-from theano.tensor.random.utils import RandomStream
-from theano.tensor.shape import Shape_i, reshape, shape, specify_shape
-from theano.tensor.sharedvar import SharedVariable
-from theano.tensor.subtensor import Subtensor, inc_subtensor
-from theano.tensor.type import (
+from aesara.graph.basic import clone_replace, graph_inputs
+from aesara.graph.fg import MissingInputError
+from aesara.misc.safe_asarray import _asarray
+from aesara.scan.basic import scan
+from aesara.scan.op import Scan
+from aesara.scan.opt import ScanMerge
+from aesara.scan.utils import until
+from aesara.scan.views import foldl, foldr
+from aesara.scan.views import map as tt_map
+from aesara.scan.views import reduce as tt_reduce
+from aesara.tensor import basic as tt
+from aesara.tensor.blas import Dot22
+from aesara.tensor.elemwise import Elemwise
+from aesara.tensor.math import Dot
+from aesara.tensor.math import all as tt_all
+from aesara.tensor.math import dot, mean
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tanh
+from aesara.tensor.nnet import categorical_crossentropy, sigmoid, softmax_graph
+from aesara.tensor.random.utils import RandomStream
+from aesara.tensor.shape import Shape_i, reshape, shape, specify_shape
+from aesara.tensor.sharedvar import SharedVariable
+from aesara.tensor.subtensor import Subtensor, inc_subtensor
+from aesara.tensor.type import (
     dcol,
     dmatrix,
     dscalar,
@@ -80,6 +79,7 @@
     tensor3,
     vector,
 )
+from tests import unittest_tools as utt
 
 
 if config.mode == "FAST_COMPILE":
@@ -303,8 +303,8 @@ def f_pow2(x_tm1):
         steps = 5
 
         numpy_values = np.array([state * (2 ** (k + 1)) for k in range(steps)])
-        theano_values = my_f(state, steps)
-        utt.assert_allclose(numpy_values, theano_values)
+        aesara_values = my_f(state, steps)
+        utt.assert_allclose(numpy_values, aesara_values)
 
     # Test that the inner input_storage and output_storage are
     # properly cleared
@@ -369,8 +369,8 @@ def f_pow2(x_tm1):
         steps = 5
 
         numpy_values = np.array([state * (2 ** (k + 1)) for k in range(steps)])
-        theano_values = my_f(state, steps)
-        utt.assert_allclose(numpy_values, theano_values[0])
+        aesara_values = my_f(state, steps)
+        utt.assert_allclose(numpy_values, aesara_values[0])
 
     def test_subtensor_multiple_slices(self):
         # This addresses a bug reported by Matthias Zoehrer
@@ -408,7 +408,7 @@ def f_pow2(x_tm1):
 
     @pytest.mark.slow
     def test_only_nonseq_inputs(self):
-        # Compile the Theano function
+        # Compile the Aesara function
         n_steps = 2
         inp = matrix()
         broadcasted_inp, _ = scan(lambda x: x, non_sequences=[inp], n_steps=n_steps)
@@ -416,7 +416,7 @@ def test_only_nonseq_inputs(self):
         gr = grad(out, inp)
         fun = function([inp], [broadcasted_inp, gr])
 
-        # Execute the Theano function and compare outputs to the expected outputs
+        # Execute the Aesara function and compare outputs to the expected outputs
         inputs = np.array([[1, 2], [3, 4]], dtype=config.floatX)
         expected_out1 = np.repeat(inputs[None], n_steps, axis=0)
         expected_out2 = np.ones(inputs.shape, dtype="int8") * n_steps
@@ -461,8 +461,8 @@ def f_rnn(u_t, x_tm1, W_in, W):
         v_out[0] = v_u[0] * W_in + v_x0 * W
         for step in range(1, 4):
             v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
-        theano_values = f2(v_u, v_x0, W_in, W)
-        utt.assert_allclose(theano_values, v_out)
+        aesara_values = f2(v_u, v_x0, W_in, W)
+        utt.assert_allclose(aesara_values, v_out)
 
     # simple rnn, one input, one state, weights for each; input/state
     # are vectors, weights are scalars; using shared variables
@@ -496,8 +496,8 @@ def f_rnn_shared(u_t, x_tm1, tmp_W_in, tmp_W):
         for step in range(1, 4):
             v_out[step] = v_u[step] * W_in.get_value() + v_out[step - 1] * W.get_value()
 
-        theano_values = f3(v_u, v_x0)
-        assert np.allclose(theano_values, v_out)
+        aesara_values = f3(v_u, v_x0)
+        assert np.allclose(aesara_values, v_out)
 
     # some rnn with multiple outputs and multiple inputs; other
     # dimension instead of scalars/vectors
@@ -550,9 +550,9 @@ def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
             v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + np.dot(v_x[i - 1], vW)
             v_y[i] = np.dot(v_x[i - 1], vWout)
 
-        (theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
-        utt.assert_allclose(theano_x, v_x)
-        utt.assert_allclose(theano_y, v_y)
+        (aesara_x, aesara_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
+        utt.assert_allclose(aesara_x, v_x)
+        utt.assert_allclose(aesara_y, v_y)
 
     def test_multiple_outs_taps(self):
         l = 5
@@ -865,7 +865,7 @@ def f_rnn_shared(u_tm2, x_tm1, x_tm2):
         )
 
         f7 = function([u, x0], outputs, updates=updates, allow_input_downcast=True)
-        theano_out = f7(vu, vx0)
+        aesara_out = f7(vu, vx0)
 
         # compute output in numpy
         # a bit of explaining:
@@ -878,7 +878,7 @@ def f_rnn_shared(u_tm2, x_tm1, x_tm2):
         numpy_out = np.zeros((2,))
         numpy_out[0] = vu[0] * vW_in + vx0[1] * vW + vx0[0]
         numpy_out[1] = vu[1] * vW_in + numpy_out[0] * vW + vx0[1]
-        utt.assert_allclose(numpy_out, theano_out)
+        utt.assert_allclose(numpy_out, aesara_out)
 
     # simple rnn, one input, one state, weights for each; input/state are
     # vectors, weights are scalars; using shared variables and past
@@ -909,14 +909,14 @@ def f_rnn_shared(u_tm2, u_tp2, x_tm1, x_tm2):
         )
 
         f8 = function([u, x0], output, updates=updates, allow_input_downcast=True)
-        theano_out = f8(vu, vx0)
+        aesara_out = f8(vu, vx0)
         # compute output in numpy
         numpy_out = np.zeros(2)
         # think of vu[0] as vu[-2], vu[4] as vu[2]
         # and vx0[0] as vx0[-2], vx0[1] as vx0[-1]
         numpy_out[0] = (vu[0] + vu[4]) * vW_in + vx0[1] * vW + vx0[0]
         numpy_out[1] = (vu[1] + vu[5]) * vW_in + numpy_out[0] * vW + vx0[1]
-        utt.assert_allclose(numpy_out, theano_out)
+        utt.assert_allclose(numpy_out, aesara_out)
 
     # simple rnn ; compute inplace version 1
     @utt.assertFailure_fast
@@ -978,12 +978,12 @@ def f_rnn_shared(u0_t, u1_t, u2_t, x0_tm1, x1_tm1):
             numpy_x0[i] = vu0[i] * vW_in + numpy_x0[i - 1] * vW + vu1[i] * vu2[i]
             numpy_x1[i] = vu0[i] * vW_in + numpy_x1[i - 1] * vW + vu1[i] + vu2[i]
 
-        # note theano computes inplace, so call function after numpy
+        # note aesara computes inplace, so call function after numpy
         # equivalent is done
-        (theano_x0, theano_x1) = f9(vu0, vu1, vu2, vx0, vx1)
-        # assert that theano does what it should
-        utt.assert_allclose(theano_x0, numpy_x0)
-        utt.assert_allclose(theano_x1, numpy_x1)
+        (aesara_x0, aesara_x1) = f9(vu0, vu1, vu2, vx0, vx1)
+        # assert that aesara does what it should
+        utt.assert_allclose(aesara_x0, numpy_x0)
+        utt.assert_allclose(aesara_x1, numpy_x1)
 
     # simple rnn ; compute inplace version 2
     @utt.assertFailure_fast
@@ -1047,12 +1047,12 @@ def f_rnn_shared(u0_t, u1_t, u1_tp1, u2_tm1, u2_t, u2_tp1, x0_tm1, x1_tm1):
                 vu0[i] * vW_in + numpy_x1[i - 1] * vW + vu2[i] + vu2[i + 1] + vu2[i + 2]
             )
 
-        # note theano computes inplace, so call function after numpy
+        # note aesara computes inplace, so call function after numpy
         # equivalent is done
-        (theano_x0, theano_x1) = f9(vu0, vu1, vu2, vx0, vx1)
-        # assert that theano does what it should
-        utt.assert_allclose(theano_x0, numpy_x0)
-        utt.assert_allclose(theano_x1, numpy_x1)
+        (aesara_x0, aesara_x1) = f9(vu0, vu1, vu2, vx0, vx1)
+        # assert that aesara does what it should
+        utt.assert_allclose(aesara_x0, numpy_x0)
+        utt.assert_allclose(aesara_x1, numpy_x1)
 
     @utt.assertFailure_fast
     def test_inplace3(self):
@@ -1116,7 +1116,7 @@ def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
 
         f10 = function([u2, y0], outputs, updates=updates, allow_input_downcast=True)
         allstuff = f10(vu2, vy0)
-        theano_y0, theano_y1, theano_y2 = allstuff
+        aesara_y0, aesara_y1, aesara_y2 = allstuff
 
         # do things in numpy
         numpy_y0 = np.zeros((6, 2))
@@ -1138,9 +1138,9 @@ def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
             numpy_W1 = numpy_W1 + 0.1
             numpy_W2 = numpy_W2 + 0.05
 
-        utt.assert_allclose(theano_y0, numpy_y0[3:])
-        utt.assert_allclose(theano_y1, numpy_y1[1:])
-        utt.assert_allclose(theano_y2, numpy_y2)
+        utt.assert_allclose(aesara_y0, numpy_y0[3:])
+        utt.assert_allclose(aesara_y1, numpy_y1[1:])
+        utt.assert_allclose(aesara_y2, numpy_y2)
         utt.assert_allclose(W1.get_value(), numpy_W1)
         utt.assert_allclose(W2.get_value(), numpy_W2)
 
@@ -1168,10 +1168,10 @@ def inner_fn(cond, x, y):
         f(np.int32(0), np.float32(1.0), np.float32(0.5))
 
     def test_simple_shared_random(self):
-        theano_rng = RandomStream(utt.fetch_seed())
+        aesara_rng = RandomStream(utt.fetch_seed())
 
         values, updates = scan(
-            lambda: theano_rng.uniform(-1, 1, size=(2,)),
+            lambda: aesara_rng.uniform(-1, 1, size=(2,)),
             [],
             [],
             [],
@@ -1188,10 +1188,10 @@ def test_simple_shared_random(self):
         for i in range(10):
             numpy_v[i] = rng.uniform(-1, 1, size=(2,))
 
-        theano_v = my_f()
-        utt.assert_allclose(theano_v, numpy_v[:5, :])
-        theano_v = my_f()
-        utt.assert_allclose(theano_v, numpy_v[5:, :])
+        aesara_v = my_f()
+        utt.assert_allclose(aesara_v, numpy_v[:5, :])
+        aesara_v = my_f()
+        utt.assert_allclose(aesara_v, numpy_v[5:, :])
 
     def test_gibbs_chain(self):
         rng = np.random.RandomState(utt.fetch_seed())
@@ -1222,12 +1222,12 @@ def f(vsample_tm1):
                 trng.binomial(1, vmean_t, size=vmean_t.shape), dtype="float32"
             )
 
-        theano_vsamples, updates = scan(
+        aesara_vsamples, updates = scan(
             f, [], vsample, [], n_steps=10, truncate_gradient=-1, go_backwards=False
         )
 
         my_f = function(
-            [vsample], theano_vsamples[-1], updates=updates, allow_input_downcast=True
+            [vsample], aesara_vsamples[-1], updates=updates, allow_input_downcast=True
         )
 
         _rng = np.random.RandomState(utt.fetch_seed())
@@ -1287,8 +1287,8 @@ def f_rnn(u_t):
 
         v_u = rng.uniform(-5.0, 5.0, size=(5,))
         numpy_result = v_u + 3
-        theano_result = f2(v_u)
-        utt.assert_allclose(theano_result, numpy_result)
+        aesara_result = f2(v_u)
+        utt.assert_allclose(aesara_result, numpy_result)
 
     def test_map(self):
         v = vector("v")
@@ -1301,8 +1301,8 @@ def test_map(self):
         rng = np.random.RandomState(utt.fetch_seed())
         vals = rng.uniform(-5.0, 5.0, size=(10,))
         abs_vals = abs(vals)
-        theano_vals = f(vals)
-        utt.assert_allclose(abs_vals, theano_vals)
+        aesara_vals = f(vals)
+        utt.assert_allclose(abs_vals, aesara_vals)
 
     def test_backwards(self):
         def f_rnn(u_t, x_tm1, W_in, W):
@@ -1339,8 +1339,8 @@ def f_rnn(u_t, x_tm1, W_in, W):
         for step in range(1, 4):
             v_out[step] = v_u[3 - step] * W_in + v_out[step - 1] * W
 
-        theano_values = f2(v_u, v_x0, W_in, W)
-        utt.assert_allclose(theano_values, v_out)
+        aesara_values = f2(v_u, v_x0, W_in, W)
+        utt.assert_allclose(aesara_values, v_out)
 
     def test_reduce(self):
         v = vector("v")
@@ -2114,12 +2114,12 @@ def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
             v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + np.dot(v_x[i - 1], vW)
             v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]
 
-        (theano_dump1, theano_dump2, theano_x, theano_y) = f4(
+        (aesara_dump1, aesara_dump2, aesara_x, aesara_y) = f4(
             v_u1, v_u2, v_x0, v_y0, vW_in1
         )
 
-        utt.assert_allclose(theano_x, v_x)
-        utt.assert_allclose(theano_y, v_y)
+        utt.assert_allclose(aesara_x, v_x)
+        utt.assert_allclose(aesara_y, v_y)
 
     def test_scan_as_tensor_on_gradients(self):
         # Bug reported by cityhall on scan when computing the gradients
@@ -2191,10 +2191,10 @@ def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
             v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + np.dot(v_x[i - 1], vW)
             v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]
 
-        (theano_dump, theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
+        (aesara_dump, aesara_x, aesara_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
 
-        utt.assert_allclose(theano_x, v_x[-1:])
-        utt.assert_allclose(theano_y, v_y[-1:])
+        utt.assert_allclose(aesara_x, v_x[-1:])
+        utt.assert_allclose(aesara_y, v_y[-1:])
 
     def caching_nsteps_by_scan_op(self):
         W = matrix("weights")
@@ -2692,7 +2692,7 @@ def test_pushout_seqs2(self):
             outputs_info=[x, None],
         )
 
-        # Compile a theano function where any optimization error will lead to
+        # Compile an Aesara function where any optimization error will lead to
         # an exception being raised
         function([x], outputs, updates=updates)
 
@@ -3056,7 +3056,7 @@ def lambda_fn(h, W1, W2):
         v_out = np.dot(v_h, v_W1 + v_W2)
         sol = np.zeros((5, 2))
         # This line is here to make sol have the same shape as the output of
-        # theano. Note that what we ask theano to do is to repeat the 2
+        # aesara. Note that what we ask aesara to do is to repeat the 2
         # elements vector v_out 5 times
         sol[:, :] = v_out
         utt.assert_allclose(sol, f(v_h, v_W1, v_W2))
@@ -3080,7 +3080,7 @@ def lambda_fn(step_idx, W1, W2):
 
         f = function([W1, W2, step_indices], o, mode=mode_with_opt)
 
-        # Compule an theano function without the optimization
+        # Compule an aesara function without the optimization
         o, _ = scan(
             lambda_fn,
             sequences=[step_indices, W1],
@@ -3555,7 +3555,7 @@ def inner_fct(seq, mitsot, sitsot, nitsot, nseq):
 
     def test_grad_duplicate_outputs(self):
         # This test validates that taking the gradient of a scan, in which
-        # multiple outputs are the same theano variable, works.
+        # multiple outputs are the same aesara variable, works.
 
         def inner_fct(inp1, inp2, inp3):
             total = inp1 + inp2 + inp3
@@ -3743,10 +3743,10 @@ def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
             v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + np.dot(v_x[i - 1], vW)
             v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]
 
-        (theano_dump, theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
+        (aesara_dump, aesara_x, aesara_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
 
-        utt.assert_allclose(theano_x, v_x[-2:])
-        utt.assert_allclose(theano_y, v_y[-4:])
+        utt.assert_allclose(aesara_x, v_x[-2:])
+        utt.assert_allclose(aesara_y, v_y[-4:])
 
     def test_opt_order(self):
         # Verify that scan optimizations are applied before blas
@@ -3810,7 +3810,7 @@ def get_outputs(x, w):
             return_val = grad(features.sum(), w)
             return return_val
 
-        # Compile the theano function
+        # Compile the aesara function
         x = tensor3("x")
         w = matrix("w")
         f = function(inputs=[x, w], outputs=get_outputs(x, w))
@@ -4898,10 +4898,10 @@ def test_grad_until(self):
         )
         g = grad(r.sum(), self.x)
         f = function([self.x, self.threshold], [r, g])
-        theano_output, theano_gradient = f(self.seq, 5)
+        aesara_output, aesara_gradient = f(self.seq, 5)
 
-        utt.assert_allclose(theano_output, self.numpy_output)
-        utt.assert_allclose(theano_gradient, self.numpy_gradient)
+        utt.assert_allclose(aesara_output, self.numpy_output)
+        utt.assert_allclose(aesara_gradient, self.numpy_gradient)
 
     def test_grad_until_ndim_greater_one(self):
         def tile_array(inp):
@@ -4917,10 +4917,10 @@ def tile_array(inp):
         )
         g = grad(r.sum(), X)
         f = function([X, self.threshold], [r, g])
-        theano_output, theano_gradient = f(arr, 5)
+        aesara_output, aesara_gradient = f(arr, 5)
 
-        utt.assert_allclose(theano_output, tile_array(self.numpy_output))
-        utt.assert_allclose(theano_gradient, tile_array(self.numpy_gradient))
+        utt.assert_allclose(aesara_output, tile_array(self.numpy_output))
+        utt.assert_allclose(aesara_gradient, tile_array(self.numpy_gradient))
 
     def test_grad_until_and_truncate(self):
         n = 3
@@ -4932,11 +4932,11 @@ def test_grad_until_and_truncate(self):
         )
         g = grad(r.sum(), self.x)
         f = function([self.x, self.threshold], [r, g])
-        theano_output, theano_gradient = f(self.seq, 5)
+        aesara_output, aesara_gradient = f(self.seq, 5)
 
         self.numpy_gradient[: 7 - n] = 0
-        utt.assert_allclose(theano_output, self.numpy_output)
-        utt.assert_allclose(theano_gradient, self.numpy_gradient)
+        utt.assert_allclose(aesara_output, self.numpy_output)
+        utt.assert_allclose(aesara_gradient, self.numpy_gradient)
 
     def test_grad_until_and_truncate_sequence_taps(self):
         n = 3
@@ -4948,12 +4948,12 @@ def test_grad_until_and_truncate_sequence_taps(self):
         )
         g = grad(r.sum(), self.x)
         f = function([self.x, self.threshold], [r, g])
-        theano_output, theano_gradient = f(self.seq, 6)
+        aesara_output, aesara_gradient = f(self.seq, 6)
 
         # Gradient computed by hand:
         numpy_grad = np.array([0, 0, 0, 5, 6, 10, 4, 5, 0, 0, 0, 0, 0, 0, 0])
         numpy_grad = numpy_grad.astype(config.floatX)
-        utt.assert_allclose(theano_gradient, numpy_grad)
+        utt.assert_allclose(aesara_gradient, numpy_grad)
 
 
 def test_condition_hidden_inp():
diff --git a/tests/scan/test_checkpoints.py b/tests/scan/test_checkpoints.py
index db35947be7..66e280182b 100644
--- a/tests/scan/test_checkpoints.py
+++ b/tests/scan/test_checkpoints.py
@@ -1,12 +1,12 @@
 import numpy as np
 import pytest
 
-from theano.compile.function import function
-from theano.gradient import grad
-from theano.scan.basic import scan
-from theano.scan.checkpoints import scan_checkpoints
-from theano.tensor.basic import ones_like
-from theano.tensor.type import iscalar, vector
+from aesara.compile.function import function
+from aesara.gradient import grad
+from aesara.scan.basic import scan
+from aesara.scan.checkpoints import scan_checkpoints
+from aesara.tensor.basic import ones_like
+from aesara.tensor.type import iscalar, vector
 
 
 class TestScanCheckpoint:
diff --git a/tests/scan/test_opt.py b/tests/scan/test_opt.py
index 7d7645d20a..5b01adbb43 100644
--- a/tests/scan/test_opt.py
+++ b/tests/scan/test_opt.py
@@ -1,20 +1,20 @@
 import numpy as np
 
-import theano
-import theano.tensor.basic as tt
+import aesara
+import aesara.tensor.basic as tt
+from aesara.configdefaults import config
+from aesara.gradient import Rop, grad, jacobian
+from aesara.scan.op import Scan
+from aesara.tensor import nnet
+from aesara.tensor.elemwise import Elemwise
+from aesara.tensor.math import Dot, dot
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tanh
+from aesara.tensor.type import matrix, tensor3, vector
 from tests import unittest_tools as utt
-from theano.configdefaults import config
-from theano.gradient import Rop, grad, jacobian
-from theano.scan.op import Scan
-from theano.tensor import nnet
-from theano.tensor.elemwise import Elemwise
-from theano.tensor.math import Dot, dot
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tanh
-from theano.tensor.type import matrix, tensor3, vector
 
 
-mode = theano.compile.mode.get_mode(config.mode)
+mode = aesara.compile.mode.get_mode(config.mode)
 
 
 class TestGaussNewton:
@@ -37,10 +37,10 @@ def _run(self, num_features, num_timesteps, batch_size, mode):
             targets_size = (num_timesteps, batch_size, 1)
 
         # make inputs and targets shared variables
-        inputs = theano.shared(
+        inputs = aesara.shared(
             self.rng.uniform(size=inputs_size).astype(config.floatX), borrow=True
         )
-        targets = theano.shared(
+        targets = aesara.shared(
             self.rng.uniform(size=targets_size).astype(config.floatX), borrow=True
         )
 
@@ -55,18 +55,18 @@ def _run(self, num_features, num_timesteps, batch_size, mode):
         t.tag.test_value = targets.get_value(borrow=True)
 
         # create a set of parameters for a simple RNN
-        W_xh = theano.shared(
+        W_xh = aesara.shared(
             (0.01 * self.rng.uniform(size=(num_features, 10))).astype(config.floatX),
             borrow=True,
         )
-        W_hh = theano.shared(
+        W_hh = aesara.shared(
             (0.01 * self.rng.uniform(size=(10, 10))).astype(config.floatX), borrow=True
         )
-        W_hy = theano.shared(
+        W_hy = aesara.shared(
             (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX), borrow=True
         )
-        b_h = theano.shared(np.zeros(10).astype(config.floatX), borrow=True)
-        b_y = theano.shared(np.zeros(1).astype(config.floatX), borrow=True)
+        b_h = aesara.shared(np.zeros(10).astype(config.floatX), borrow=True)
+        b_y = aesara.shared(np.zeros(1).astype(config.floatX), borrow=True)
 
         params = [W_xh, W_hh, W_hy, b_h, b_y]
 
@@ -80,7 +80,7 @@ def step(x_t, h_tm1):
             h_0 = tt.alloc(0.0, 10).astype(config.floatX)
         else:
             h_0 = tt.alloc(0.0, batch_size, 10).astype(config.floatX)
-        h, updates = theano.scan(step, sequences=[x], outputs_info=[h_0])
+        h, updates = aesara.scan(step, sequences=[x], outputs_info=[h_0])
         # network output
         y = dot(h, W_hy) + b_y
 
@@ -96,15 +96,15 @@ def step(x_t, h_tm1):
         # during certain iterations of CG in the HF algorithm. There,
         # it's in fact `pi + current update proposal`.  For simplicity,
         # I just multiply by 2 here.
-        cost_ = theano.clone_replace(cost, replace={pi: 2 * pi for pi in params})
+        cost_ = aesara.clone_replace(cost, replace={pi: 2 * pi for pi in params})
 
         # Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG,
         # but for simplicity, I just take the parameters vector because it's
         # already there.
         Gv = gn(v=params, cost=cost, parameters=params, damp=tt.constant(1.0))
 
-        # compile Theano function
-        f = theano.function([], [cost_] + Gv, givens={x: inputs, t: targets}, mode=mode)
+        # compile Aesara function
+        f = aesara.function([], [cost_] + Gv, givens={x: inputs, t: targets}, mode=mode)
         # execute
         f()
 
@@ -155,10 +155,10 @@ def test_dot_not_output(self):
         # Compile the function twice, once with the optimization and once
         # without
         opt_mode = mode.including("scan")
-        f_opt = theano.function([v, m], jacobian(output, v), mode=opt_mode)
+        f_opt = aesara.function([v, m], jacobian(output, v), mode=opt_mode)
 
         no_opt_mode = mode.excluding("scanOp_pushout_output")
-        f_no_opt = theano.function([v, m], jacobian(output, v), mode=no_opt_mode)
+        f_no_opt = aesara.function([v, m], jacobian(output, v), mode=no_opt_mode)
 
         # Ensure that the optimization was performed correctly in f_opt
         # The inner function of scan should have only one output and it should
@@ -190,17 +190,17 @@ def inner_fct(vect, mat):
             vect_squared = vect ** 2
             return dot(vect_squared, mat), vect_squared
 
-        outputs, updates = theano.scan(
+        outputs, updates = aesara.scan(
             fn=inner_fct, outputs_info=[None] * 2, sequences=a, non_sequences=b
         )
 
         # Compile the function twice, once with the optimization and once
         # without
         opt_mode = mode.including("scan")
-        f_opt = theano.function([a, b], outputs, mode=opt_mode)
+        f_opt = aesara.function([a, b], outputs, mode=opt_mode)
 
         no_opt_mode = mode.excluding("scanOp_pushout_output")
-        f_no_opt = theano.function([a, b], outputs, mode=no_opt_mode)
+        f_no_opt = aesara.function([a, b], outputs, mode=no_opt_mode)
 
         # Ensure that the optimization was performed correctly in f_opt
         # The inner function of scan should have only one output and it should
@@ -236,17 +236,17 @@ def inner_fct(seq1, previous_output1, nonseq1):
             output2 = dot(output1, nonseq1)
             return output1, output2
 
-        outputs, updates = theano.scan(
+        outputs, updates = aesara.scan(
             fn=inner_fct, outputs_info=[a[0], None], sequences=a, non_sequences=b
         )
 
         # Compile the function twice, once with the optimization and once
         # without
         opt_mode = mode.including("scan")
-        f_opt = theano.function([a, b], outputs, mode=opt_mode)
+        f_opt = aesara.function([a, b], outputs, mode=opt_mode)
 
         no_opt_mode = mode.excluding("scanOp_pushout_output")
-        f_no_opt = theano.function([a, b], outputs, mode=no_opt_mode)
+        f_no_opt = aesara.function([a, b], outputs, mode=no_opt_mode)
 
         # Ensure that the optimization was performed correctly in f_opt
         # The inner function of scan should have only one output and it should
@@ -289,13 +289,13 @@ def test_machine_translation(self):
         dim = 5
 
         # Weight matrices
-        U = theano.shared(
+        U = aesara.shared(
             np.random.normal(size=(dim, dim), scale=0.0001).astype(config.floatX)
         )
         U.name = "U"
-        V = theano.shared(U.get_value())
+        V = aesara.shared(U.get_value())
         V.name = "V"
-        W = theano.shared(U.get_value())
+        W = aesara.shared(U.get_value())
         W.name = "W"
 
         # Variables and their values
@@ -335,7 +335,7 @@ def rnn_step1(
         # Compile the function twice, once with the optimization and once
         # without
         opt_mode = mode.including("scan")
-        h, _ = theano.scan(
+        h, _ = aesara.scan(
             rnn_step1,
             sequences=[x, ri, zi],
             n_steps=seq_len,
@@ -345,10 +345,10 @@ def rnn_step1(
         )
         cost = h[-1].sum()
         grad1 = grad(cost, [U, V, W])
-        f_opt = theano.function(inputs=[x, ri, zi], outputs=grad1, mode=opt_mode)
+        f_opt = aesara.function(inputs=[x, ri, zi], outputs=grad1, mode=opt_mode)
 
         no_opt_mode = mode.excluding("scanOp_pushout_output")
-        h, _ = theano.scan(
+        h, _ = aesara.scan(
             rnn_step1,
             sequences=[x, ri, zi],
             n_steps=seq_len,
@@ -358,7 +358,7 @@ def rnn_step1(
         )
         cost = h[-1].sum()
         grad1 = grad(cost, [U, V, W])
-        f_no_opt = theano.function(inputs=[x, ri, zi], outputs=grad1, mode=no_opt_mode)
+        f_no_opt = aesara.function(inputs=[x, ri, zi], outputs=grad1, mode=no_opt_mode)
 
         # Validate that the optimization has been applied
         scan_node_grad = [
@@ -383,8 +383,8 @@ def test_non_zero_init(self):
         input2 = tensor3()
         input3 = tensor3()
 
-        W = theano.shared(np.random.normal(size=(4, 5))).astype(config.floatX)
-        U = theano.shared(np.random.normal(size=(6, 7))).astype(config.floatX)
+        W = aesara.shared(np.random.normal(size=(4, 5))).astype(config.floatX)
+        U = aesara.shared(np.random.normal(size=(6, 7))).astype(config.floatX)
 
         def inner_fct(seq1, seq2, seq3, previous_output):
             temp1 = dot(seq1, W) + seq3
@@ -397,24 +397,24 @@ def inner_fct(seq1, seq2, seq3, previous_output):
         # Compile the function twice, once with the optimization and once
         # without
         opt_mode = mode.including("scan")
-        h, _ = theano.scan(
+        h, _ = aesara.scan(
             inner_fct,
             sequences=[input1, input2, input3],
             outputs_info=init,
             mode=opt_mode,
         )
         output = h[-1]
-        f_opt = theano.function([input1, input2, input3], output, mode=opt_mode)
+        f_opt = aesara.function([input1, input2, input3], output, mode=opt_mode)
 
         no_opt_mode = mode.excluding("scanOp_pushout_output")
-        h, _ = theano.scan(
+        h, _ = aesara.scan(
             inner_fct,
             sequences=[input1, input2, input3],
             outputs_info=init,
             mode=no_opt_mode,
         )
         output = h[-1]
-        f_no_opt = theano.function([input1, input2, input3], output, mode=no_opt_mode)
+        f_no_opt = aesara.function([input1, input2, input3], output, mode=no_opt_mode)
 
         # Ensure that the optimization has been applied for f_opt
         # TODO
diff --git a/tests/scan/test_utils.py b/tests/scan/test_utils.py
index 0c9f2d9056..2d6359e668 100644
--- a/tests/scan/test_utils.py
+++ b/tests/scan/test_utils.py
@@ -3,10 +3,10 @@
 import numpy as np
 import pytest
 
-import theano
-from theano import tensor as tt
-from theano.scan.utils import map_variables
-from theano.tensor.type import scalar, vector
+import aesara
+from aesara import tensor as tt
+from aesara.scan.utils import map_variables
+from aesara.tensor.type import scalar, vector
 
 
 class TestMapVariables:
@@ -34,10 +34,10 @@ def test_leaf_inside_scan(self):
 
         y.tag.replacement = z
 
-        s, _ = theano.scan(lambda x: x * y, sequences=x)
+        s, _ = aesara.scan(lambda x: x * y, sequences=x)
         (s2,) = map_variables(self.replacer, [s])
 
-        f = theano.function([x, y, z], [s, s2])
+        f = aesara.function([x, y, z], [s, s2])
         rval = f(x=np.array([1, 2, 3], dtype=np.float32), y=1, z=2)
         assert np.array_equal(rval, [[1, 2, 3], [2, 4, 6]])
 
@@ -50,7 +50,7 @@ def test_scan(self):
         # imports them into the inner graph properly, and map_variables()
         # should do this as well.
         outer = scalar("outer")
-        shared = theano.shared(np.array(1.0, dtype=theano.config.floatX), name="shared")
+        shared = aesara.shared(np.array(1.0, dtype=aesara.config.floatX), name="shared")
         constant = tt.constant(1, name="constant")
 
         # z will equal 1 so multiplying by it doesn't change any values
@@ -61,14 +61,14 @@ def step(x, a):
             r.tag.replacement = z * (a - x)
             return r
 
-        s, _ = theano.scan(step, sequences=x, outputs_info=[np.array(0.0)])
+        s, _ = aesara.scan(step, sequences=x, outputs_info=[np.array(0.0)])
         # ensure z is owned by the outer graph so map_variables() will need to
         # jump through additional hoops to placate FunctionGraph.
         t = z * s
         (s2,) = map_variables(self.replacer, [t])
         t2 = z * s2
 
-        f = theano.function([x, outer], [t, t2])
+        f = aesara.function([x, outer], [t, t2])
         rval = f(x=np.array([1, 2, 3], dtype=np.float32), outer=0.5)
         assert np.array_equal(rval, [[1, 3, 6], [-1, -3, -6]])
 
@@ -76,7 +76,7 @@ def test_scan_with_shared_update(self):
         x = vector("x")
 
         # counts how many times its value is used
-        counter = theano.shared(0, name="shared")
+        counter = aesara.shared(0, name="shared")
         counter.update = counter + 1
 
         def step(x, a):
@@ -87,7 +87,7 @@ def step(x, a):
             r.tag.replacement = counter * (a - x)
             return r
 
-        s, _ = theano.scan(step, sequences=x, outputs_info=[np.array(0.0)])
+        s, _ = aesara.scan(step, sequences=x, outputs_info=[np.array(0.0)])
         with pytest.raises(NotImplementedError):
             map_variables(self.replacer, [s])
 
@@ -95,7 +95,7 @@ def test_scan_with_shared_update2(self):
         x = vector("x")
 
         # counts how many times its value is used
-        counter = theano.shared(0, name="shared")
+        counter = aesara.shared(0, name="shared")
         counter.update = counter + 1
 
         def step(x, a):
@@ -110,7 +110,7 @@ def step(x, a):
             # unsupported.
             return r + counter
 
-        s, _ = theano.scan(step, sequences=x, outputs_info=[np.array(0.0)])
+        s, _ = aesara.scan(step, sequences=x, outputs_info=[np.array(0.0)])
         with pytest.raises(NotImplementedError):
             map_variables(self.replacer, [s])
 
@@ -118,7 +118,7 @@ def test_opfromgraph(self):
         # as with the scan tests above, insert foreign inputs into the
         # inner graph.
         outer = scalar("outer")
-        shared = theano.shared(np.array(1.0, dtype=theano.config.floatX), name="shared")
+        shared = aesara.shared(np.array(1.0, dtype=aesara.config.floatX), name="shared")
         constant = tt.constant(1.0, name="constant")
         z = outer * (shared + constant)
 
@@ -131,12 +131,12 @@ def test_opfromgraph(self):
         # construct the outer graph
         c = scalar()
         d = scalar()
-        u = theano.compile.builders.OpFromGraph([a, b], [r])(c, d)
+        u = aesara.compile.builders.OpFromGraph([a, b], [r])(c, d)
         t = z * u
         (v,) = map_variables(self.replacer, [t])
         t2 = z * v
 
-        f = theano.function([c, d, outer], [t, t2])
+        f = aesara.function([c, d, outer], [t, t2])
         for m, n in itertools.combinations(range(10), 2):
             assert f(m, n, outer=0.5) == [m + n, m - n]
 
diff --git a/tests/sparse/sandbox/test_sp.py b/tests/sparse/sandbox/test_sp.py
index da1d723212..5d0852905c 100644
--- a/tests/sparse/sandbox/test_sp.py
+++ b/tests/sparse/sandbox/test_sp.py
@@ -8,10 +8,10 @@
 import numpy as np
 from scipy.signal import convolve2d
 
+from aesara import function
+from aesara.sparse.sandbox import sp
+from aesara.tensor.type import dmatrix, dvector
 from tests import unittest_tools as utt
-from theano import function
-from theano.sparse.sandbox import sp
-from theano.tensor.type import dmatrix, dvector
 
 
 class TestSP:
@@ -89,7 +89,7 @@ def test_convolution(self):
                     assert (temp < 1e-5).all()
 
                     # test downward propagation -- symbolic stuff
-                    # vis = theano.gradient.grad(output, input, output)
+                    # vis = aesara.gradient.grad(output, input, output)
                     # downprop = function([kerns,input], vis, mode=mode)
                     # visval = downprop(filters,img1d)
                     # test downward propagation -- reference implementation
@@ -117,7 +117,7 @@ def test_convolution(self):
 
     #            print '**** Convolution Profiling Results (',mode,') ****'
     #            print 'Numpy processing time: ', ntot
-    #            print 'Theano processing time: ', ttot
+    #            print 'Aesara processing time: ', ttot
 
     # this doesn't compare the output of anything... but I manually verified that the patches
     # are properly generated
diff --git a/tests/sparse/test_basic.py b/tests/sparse/test_basic.py
index 2259809980..c99e2fc41d 100644
--- a/tests/sparse/test_basic.py
+++ b/tests/sparse/test_basic.py
@@ -5,19 +5,17 @@
 import pytest
 from packaging import version
 
-import theano
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from tests.tensor.test_sharedvar import makeSharedTester
-from theano import sparse
-from theano.compile.function import function
-from theano.compile.io import In, Out
-from theano.configdefaults import config
-from theano.gradient import GradientError
-from theano.graph.basic import Apply, Constant
-from theano.graph.op import Op
-from theano.misc.safe_asarray import _asarray
-from theano.sparse import (
+import aesara
+import aesara.tensor as tt
+from aesara import sparse
+from aesara.compile.function import function
+from aesara.compile.io import In, Out
+from aesara.configdefaults import config
+from aesara.gradient import GradientError
+from aesara.graph.basic import Apply, Constant
+from aesara.graph.op import Op
+from aesara.misc.safe_asarray import _asarray
+from aesara.sparse import (
     CSC,
     CSM,
     CSR,
@@ -79,19 +77,19 @@
     transpose,
     true_dot,
 )
-from theano.sparse.basic import (
+from aesara.sparse.basic import (
     _is_dense_variable,
     _is_sparse,
     _is_sparse_variable,
     _mtypes,
 )
-from theano.sparse.opt import CSMGradC, StructuredDotCSC, UsmmCscDense
-from theano.tensor.basic import MakeVector
-from theano.tensor.elemwise import DimShuffle, Elemwise
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.shape import Shape_i
-from theano.tensor.subtensor import AdvancedIncSubtensor1, AdvancedSubtensor1, Subtensor
-from theano.tensor.type import (
+from aesara.sparse.opt import CSMGradC, StructuredDotCSC, UsmmCscDense
+from aesara.tensor.basic import MakeVector
+from aesara.tensor.elemwise import DimShuffle, Elemwise
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.shape import Shape_i
+from aesara.tensor.subtensor import AdvancedIncSubtensor1, AdvancedSubtensor1, Subtensor
+from aesara.tensor.type import (
     TensorType,
     float_dtypes,
     fscalar,
@@ -103,13 +101,15 @@
     tensor,
     vector,
 )
+from tests import unittest_tools as utt
+from tests.tensor.test_sharedvar import makeSharedTester
 
 
 sp = pytest.importorskip("scipy", minversion="0.7.0")
 
 
 # Probability distributions are currently tested in test_sp2.py
-# from theano.sparse import (
+# from aesara.sparse import (
 #    Poisson, poisson, Binomial, Multinomial, multinomial)
 
 
@@ -164,7 +164,7 @@ def sparse_random_inputs(
     """
     Return a tuple containing everything needed to perform a test.
 
-    If `out_dtype` is `None`, theano.config.floatX is used.
+    If `out_dtype` is `None`, aesara.config.floatX is used.
 
     :param format: Sparse format.
     :param shape: Shape of data.
@@ -183,11 +183,11 @@ def sparse_random_inputs(
                              sparse matrix.
     :return: (variable, data) where both `variable` and `data` are list.
 
-    :note: explicit_zero and unsorted_indices was added in Theano 0.6rc4
+    :note: explicit_zero and unsorted_indices was added in Aesara 0.6rc4
     """
 
     if out_dtype is None:
-        out_dtype = theano.config.floatX
+        out_dtype = aesara.config.floatX
 
     assert 0 <= p <= 1
     assert len(shape) == 2
@@ -217,7 +217,7 @@ def _rand():
         return (where * value).astype(out_dtype)
 
     variable = [
-        getattr(theano.sparse, format + "_matrix")(dtype=out_dtype) for k in range(n)
+        getattr(aesara.sparse, format + "_matrix")(dtype=out_dtype) for k in range(n)
     ]
     data = [
         getattr(sp.sparse, format + "_matrix")(_rand(), dtype=out_dtype)
@@ -245,7 +245,7 @@ def _rand():
 
     # numpy 1.5.0 with scipy 0.9.0 have sp.sparse.XXX_matrix return
     # typenum 10(ulonglong) instead of 8(uint64) event if they are the same!
-    # Theano don't like ulonglong type_num
+    # Aesara don't like ulonglong type_num
     dtype = np.dtype(out_dtype)  # Convert into dtype object.
     if data[0].dtype.num != dtype.num and dtype.str == data[0].dtype.str:
         data[0].data = _asarray(data[0].data, out_dtype)
@@ -255,7 +255,7 @@ def _rand():
 
 def verify_grad_sparse(op, pt, structured=False, *args, **kwargs):
     """
-    Wrapper for theano.test.unittest_tools.py:verify_grad which
+    Wrapper for aesara.test.unittest_tools.py:verify_grad which
     converts sparse variables back and forth.
 
     Parameters
@@ -444,7 +444,7 @@ def test_csm_grad(self):
             s = ivector()
             call = getattr(sp.sparse, sparsetype + "_matrix")
             spm = call(random_lil((300, 400), config.floatX, 5))
-            out = theano.grad(dense_from_sparse(CSM(sparsetype)(x, y, z, s)).sum(), x)
+            out = aesara.grad(dense_from_sparse(CSM(sparsetype)(x, y, z, s)).sum(), x)
             self._compile_and_check(
                 [x, y, z, s],
                 [out],
@@ -587,7 +587,7 @@ def test_structured_dot_grad(self):
         ]:
             x = SparseType(format, dtype=config.floatX)()
             y = SparseType(format, dtype=config.floatX)()
-            grads = theano.grad(dense_from_sparse(structured_dot(x, y)).sum(), [x, y])
+            grads = aesara.grad(dense_from_sparse(structured_dot(x, y)).sum(), [x, y])
             self._compile_and_check(
                 [x, y],
                 [grads[0]],
@@ -650,15 +650,15 @@ def test_adv_sub1_sparse_grad(self):
         # Assert we don't create a sparse grad by default
         m = matrix()
         sub = m[v]
-        g = theano.grad(sub.sum(), m)
+        g = aesara.grad(sub.sum(), m)
         assert isinstance(g.owner.op, AdvancedIncSubtensor1)
 
         # Test that we create a sparse grad when asked
         # USER INTERFACE
         m = matrix()
         v = ivector()
-        sub = theano.sparse.sparse_grad(m[v])
-        g = theano.grad(sub.sum(), m)
+        sub = aesara.sparse.sparse_grad(m[v])
+        g = aesara.grad(sub.sum(), m)
         assert isinstance(g.owner.op, ConstructSparseFromList)
 
         # Test that we create a sparse grad when asked
@@ -666,17 +666,17 @@ def test_adv_sub1_sparse_grad(self):
         m = matrix()
         v = ivector()
         sub = AdvancedSubtensor1(sparse_grad=True)(m, v)
-        g = theano.grad(sub.sum(), m)
+        g = aesara.grad(sub.sum(), m)
         assert isinstance(g.owner.op, ConstructSparseFromList)
 
         # Test the sparse grad
         valm = np.random.rand(5, 4).astype(config.floatX)
         valv = np.random.randint(0, 5, 10)
         m = matrix()
-        shared_v = theano.shared(valv)
+        shared_v = aesara.shared(valv)
 
         def fn(m):
-            return theano.sparse.sparse_grad(m[shared_v])
+            return aesara.sparse.sparse_grad(m[shared_v])
 
         verify_grad_sparse(fn, [valm])
 
@@ -687,14 +687,14 @@ def test_err(self):
             sub = t[v]
 
             # Assert we don't create a sparse grad by default
-            g = theano.grad(sub.sum(), t)
+            g = aesara.grad(sub.sum(), t)
             assert isinstance(g.owner.op, AdvancedIncSubtensor1)
 
             # Test that we raise an error, as we can't create a sparse
             # grad from tensors that don't have 2 dimensions.
-            sub = theano.sparse.sparse_grad(sub)
+            sub = aesara.sparse.sparse_grad(sub)
             with pytest.raises(TypeError):
-                theano.grad(sub.sum(), t)
+                aesara.grad(sub.sum(), t)
 
 
 class TestAddMul:
@@ -788,7 +788,7 @@ def _testSD(
             for a in [
                 np.array(array1),
                 tt.as_tensor_variable(array1),
-                theano.shared(array1),
+                aesara.shared(array1),
             ]:
                 for dtype1, dtype2 in [
                     ("float64", "int8"),
@@ -846,7 +846,7 @@ def _testDS(
             for b in [
                 np.asarray(array2),
                 tt.as_tensor_variable(array2),
-                theano.shared(array2),
+                aesara.shared(array2),
             ]:
                 for dtype1, dtype2 in [
                     ("float64", "int8"),
@@ -911,13 +911,13 @@ def _rand_ranged(self, min, max, shape):
         version.parse(sp.__version__) < version.parse("0.13"),
         reason="Comparison operators need newer release of scipy",
     )
-    def __generalized_ss_test(self, theanop, symbolicType, testOp, scipyType):
+    def __generalized_ss_test(self, aesarap, symbolicType, testOp, scipyType):
         x = symbolicType()
         y = symbolicType()
 
-        op = theanop(x, y)
+        op = aesarap(x, y)
 
-        f = theano.function([x, y], op)
+        f = aesara.function([x, y], op)
 
         m1 = scipyType(random_lil((10, 40), config.floatX, 3))
         m2 = scipyType(random_lil((10, 40), config.floatX, 3))
@@ -928,13 +928,13 @@ def __generalized_ss_test(self, theanop, symbolicType, testOp, scipyType):
         version.parse(sp.__version__) < version.parse("0.13"),
         reason="Comparison operators need newer release of scipy",
     )
-    def __generalized_sd_test(self, theanop, symbolicType, testOp, scipyType):
+    def __generalized_sd_test(self, aesarap, symbolicType, testOp, scipyType):
         x = symbolicType()
         y = matrix()
 
-        op = theanop(x, y)
+        op = aesarap(x, y)
 
-        f = theano.function([x, y], op)
+        f = aesara.function([x, y], op)
 
         m1 = scipyType(random_lil((10, 40), config.floatX, 3))
         m2 = self._rand_ranged(1000, -1000, [10, 40])
@@ -945,13 +945,13 @@ def __generalized_sd_test(self, theanop, symbolicType, testOp, scipyType):
         version.parse(sp.__version__) < version.parse("0.13"),
         reason="Comparison operators need newer release of scipy",
     )
-    def __generalized_ds_test(self, theanop, symbolicType, testOp, scipyType):
+    def __generalized_ds_test(self, aesarap, symbolicType, testOp, scipyType):
         x = symbolicType()
         y = matrix()
 
-        op = theanop(y, x)
+        op = aesarap(y, x)
 
-        f = theano.function([y, x], op)
+        f = aesara.function([y, x], op)
 
         m1 = scipyType(random_lil((10, 40), config.floatX, 3))
         m2 = self._rand_ranged(1000, -1000, [10, 40])
@@ -1002,13 +1002,13 @@ def test_equality_case(self):
         x = sparse.csc_matrix()
         y = matrix()
 
-        m1 = sp.sparse.csc_matrix((2, 2), dtype=theano.config.floatX)
-        m2 = np.asarray([[0, 0], [0, 0]], dtype=theano.config.floatX)
+        m1 = sp.sparse.csc_matrix((2, 2), dtype=aesara.config.floatX)
+        m2 = np.asarray([[0, 0], [0, 0]], dtype=aesara.config.floatX)
 
         for func in self.testsDic:
 
             op = func(y, x)
-            f = theano.function([y, x], op)
+            f = aesara.function([y, x], op)
 
             assert np.array_equal(f(m2, m1), self.testsDic[func](m2, m1))
 
@@ -1061,8 +1061,8 @@ def check_format_ndim(format, ndim):
         s_m = -s
         d = dense_from_sparse(s_m)
         c = d.sum()
-        g = theano.grad(c, x)
-        f = theano.function([x], [s, g])
+        g = aesara.grad(c, x)
+        f = aesara.function([x], [s, g])
         f(np.array(0, dtype=config.floatX, ndmin=ndim))
         f(np.array(7, dtype=config.floatX, ndmin=ndim))
 
@@ -1110,7 +1110,7 @@ def test_csm_properties(self):
         for format in ["csc", "csr"]:
             for dtype in ["float32", "float64"]:
                 x = SparseType(format, dtype=dtype)()
-                f = theano.function([x], csm_properties(x))
+                f = aesara.function([x], csm_properties(x))
 
                 spmat = sp_types[format](random_lil((4, 3), dtype, 3))
 
@@ -1155,9 +1155,9 @@ def test_csm_sparser(self):
 
                 a = as_sparse_variable(sp_types[format](random_lil((4, 3), dtype, 1)))
 
-                f = theano.function(
+                f = aesara.function(
                     [x, y, z, s],
-                    theano.grad(
+                    aesara.grad(
                         dense_from_sparse(a * CSM(format)(x, y, z, s)).sum(), x
                     ),
                 )
@@ -1205,7 +1205,7 @@ def test_csm(self):
                 y = ivector()
                 z = ivector()
                 s = ivector()
-                f = theano.function([x, y, z, s], CSM(format)(x, y, z, s))
+                f = aesara.function([x, y, z, s], CSM(format)(x, y, z, s))
 
                 spmat = sp_types[format](random_lil((4, 3), dtype, 3))
 
@@ -1272,7 +1272,7 @@ def test_upcast(self):
         )
         for dense_dtype in typenames:
             for sparse_dtype in typenames:
-                correct_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
+                correct_dtype = aesara.scalar.upcast(sparse_dtype, dense_dtype)
                 a = SparseType("csc", dtype=sparse_dtype)()
                 b = matrix(dtype=dense_dtype)
                 d = structured_dot(a, b)
@@ -1280,7 +1280,7 @@ def test_upcast(self):
 
                 # compile and run a function
 
-                f = theano.function([a, b], d)
+                f = aesara.function([a, b], d)
 
                 M, N, K, nnz = (4, 3, 5, 3)
                 spmat = sp.sparse.csc_matrix(random_lil((M, N), sparse_dtype, nnz))
@@ -1295,11 +1295,11 @@ def test_upcast(self):
                 # print 'dtype strings', spmat.dtype, mat.dtype
                 # print 'numpy dtype num', mat.dtype.num
                 # print 'scipy dtype num', spmat.data.dtype.num
-                theano_result = f(spmat, mat)
+                aesara_result = f(spmat, mat)
                 scipy_result = spmat * mat
-                assert theano_result.shape == scipy_result.shape
-                assert theano_result.dtype == scipy_result.dtype
-                utt.assert_allclose(scipy_result, theano_result)
+                assert aesara_result.shape == scipy_result.shape
+                assert aesara_result.dtype == scipy_result.dtype
+                utt.assert_allclose(scipy_result, aesara_result)
 
     def test_opt_unpack(self):
         #
@@ -1324,7 +1324,7 @@ def test_opt_unpack(self):
         images = TensorType(dtype="float32", broadcastable=[False, False])("images")
 
         cscmat = CSC(kerns, spmat.indices[: spmat.size], spmat.indptr, spmat.shape)
-        f = theano.function([kerns, images], structured_dot(cscmat, images.T))
+        f = aesara.function([kerns, images], structured_dot(cscmat, images.T))
 
         sdcscpresent = False
         for node in f.maker.fgraph.toposort():
@@ -1361,7 +1361,7 @@ def test_dot_sparse_sparse(self):
                 a = SparseType(sparse_format_a, dtype=sparse_dtype)()
                 b = SparseType(sparse_format_b, dtype=sparse_dtype)()
                 d = tt.dot(a, b)
-                f = theano.function([a, b], Out(d, borrow=True))
+                f = aesara.function([a, b], Out(d, borrow=True))
                 for M, N, K, nnz in [
                     (4, 3, 2, 3),
                     (40, 30, 20, 3),
@@ -1383,7 +1383,7 @@ def test_csc_correct_output_faster_than_scipy(self):
         a = SparseType("csc", dtype=sparse_dtype)()
         b = matrix(dtype=dense_dtype)
         d = tt.dot(a, b)
-        f = theano.function([a, b], Out(d, borrow=True))
+        f = aesara.function([a, b], Out(d, borrow=True))
 
         for M, N, K, nnz in [
             (4, 3, 2, 3),
@@ -1393,33 +1393,33 @@ def test_csc_correct_output_faster_than_scipy(self):
         ]:
             spmat = sp.sparse.csc_matrix(random_lil((M, N), sparse_dtype, nnz))
             mat = np.asarray(np.random.randn(N, K), dense_dtype)
-            theano_times = []
+            aesara_times = []
             scipy_times = []
             for i in range(5):
                 t0 = time.time()
-                theano_result = f(spmat, mat)
+                aesara_result = f(spmat, mat)
                 t1 = time.time()
                 scipy_result = spmat * mat
                 t2 = time.time()
 
-                theano_times.append(t1 - t0)
+                aesara_times.append(t1 - t0)
                 scipy_times.append(t2 - t1)
 
-            theano_time = np.min(theano_times)
+            aesara_time = np.min(aesara_times)
             scipy_time = np.min(scipy_times)
 
-            # speedup = scipy_time / theano_time
+            # speedup = scipy_time / aesara_time
             # print scipy_times
-            # print theano_times
-            # print ('M=%(M)s N=%(N)s K=%(K)s nnz=%(nnz)s theano_time'
-            #       '=%(theano_time)s speedup=%(speedup)s') % locals()
+            # print aesara_times
+            # print ('M=%(M)s N=%(N)s K=%(K)s nnz=%(nnz)s aesara_time'
+            #       '=%(aesara_time)s speedup=%(speedup)s') % locals()
 
-            # fail if Theano is slower than scipy by more than a certain amount
+            # fail if Aesara is slower than scipy by more than a certain amount
             overhead_tol = 0.003  # seconds overall
             overhead_rtol = 1.2  # times as long
-            utt.assert_allclose(scipy_result, theano_result)
-            if theano.config.mode == "FAST_RUN" and theano.config.cxx:
-                assert theano_time <= overhead_rtol * scipy_time + overhead_tol
+            utt.assert_allclose(scipy_result, aesara_result)
+            if aesara.config.mode == "FAST_RUN" and aesara.config.cxx:
+                assert aesara_time <= overhead_rtol * scipy_time + overhead_tol
 
     def test_csr_correct_output_faster_than_scipy(self):
 
@@ -1431,7 +1431,7 @@ def test_csr_correct_output_faster_than_scipy(self):
         a = SparseType("csr", dtype=sparse_dtype)()
         b = matrix(dtype=dense_dtype)
         d = tt.dot(a, b)
-        f = theano.function([a, b], d)
+        f = aesara.function([a, b], d)
 
         for M, N, K, nnz in [
             (4, 3, 2, 3),
@@ -1442,21 +1442,21 @@ def test_csr_correct_output_faster_than_scipy(self):
             spmat = sp.sparse.csr_matrix(random_lil((M, N), sparse_dtype, nnz))
             mat = np.asarray(np.random.randn(N, K), dense_dtype)
             t0 = time.time()
-            theano_result = f(spmat, mat)
+            aesara_result = f(spmat, mat)
             t1 = time.time()
             scipy_result = spmat * mat
             t2 = time.time()
 
-            theano_time = t1 - t0
+            aesara_time = t1 - t0
             scipy_time = t2 - t1
-            # print 'theano took', theano_time,
+            # print 'aesara took', aesara_time,
             # print 'scipy took', scipy_time
             overhead_tol = 0.002  # seconds
             overhead_rtol = 1.1  # times as long
-            utt.assert_allclose(scipy_result, theano_result)
-            if theano.config.mode == "FAST_RUN" and theano.config.cxx:
-                assert theano_time <= overhead_rtol * scipy_time + overhead_tol, (
-                    theano_time,
+            utt.assert_allclose(scipy_result, aesara_result)
+            if aesara.config.mode == "FAST_RUN" and aesara.config.cxx:
+                assert aesara_time <= overhead_rtol * scipy_time + overhead_tol, (
+                    aesara_time,
                     overhead_rtol * scipy_time + overhead_tol,
                     scipy_time,
                     overhead_rtol,
@@ -1472,23 +1472,23 @@ def setup_method(self):
         utt.seed_rng()
 
         self.x_csr = sp.sparse.csr_matrix(
-            np.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX
+            np.random.binomial(1, 0.5, x_size), dtype=aesara.config.floatX
         )
         self.x_csc = sp.sparse.csc_matrix(
-            np.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX
+            np.random.binomial(1, 0.5, x_size), dtype=aesara.config.floatX
         )
         self.y = np.asarray(
-            np.random.uniform(-1, 1, y_size), dtype=theano.config.floatX
+            np.random.uniform(-1, 1, y_size), dtype=aesara.config.floatX
         )
         self.y_csr = sp.sparse.csr_matrix(
-            np.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX
+            np.random.binomial(1, 0.5, y_size), dtype=aesara.config.floatX
         )
         self.y_csc = sp.sparse.csc_matrix(
-            np.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX
+            np.random.binomial(1, 0.5, y_size), dtype=aesara.config.floatX
         )
-        self.v_10 = np.asarray(np.random.uniform(-1, 1, 10), dtype=theano.config.floatX)
+        self.v_10 = np.asarray(np.random.uniform(-1, 1, 10), dtype=aesara.config.floatX)
         self.v_100 = np.asarray(
-            np.random.uniform(-1, 1, 100), dtype=theano.config.floatX
+            np.random.uniform(-1, 1, 100), dtype=aesara.config.floatX
         )
 
     def test_csr_dense(self):
@@ -1501,7 +1501,7 @@ def test_csr_dense(self):
             (x, v, self.x_csr, self.v_100),
             (v, x, self.v_10, self.x_csr),
         ]:
-            f_a = theano.function([x, y], sparse.dot(x, y))
+            f_a = aesara.function([x, y], sparse.dot(x, y))
 
             def f_b(x, y):
                 return x * y
@@ -1524,7 +1524,7 @@ def test_csc_dense(self):
             (v, x, self.v_10, self.x_csc),
         ]:
 
-            f_a = theano.function([x, y], sparse.dot(x, y))
+            f_a = aesara.function([x, y], sparse.dot(x, y))
 
             def f_b(x, y):
                 return x * y
@@ -1557,21 +1557,21 @@ def test_sparse_sparse(self):
                 def f_a(x, y):
                     return x * y
 
-                f_b = theano.function([x, y], sparse.dot(x, y))
+                f_b = aesara.function([x, y], sparse.dot(x, y))
 
                 vx = getattr(self, "x_" + x_f).astype(d1)
                 vy = getattr(self, "y_" + y_f).astype(d2)
                 utt.assert_allclose(f_a(vx, vy).toarray(), f_b(vx, vy))
 
                 # Test infer_shape
-                f_a = theano.function([x, y], sparse.dot(x, y).shape)
+                f_a = aesara.function([x, y], sparse.dot(x, y).shape)
 
                 def f_b(x, y):
                     return (x * y).shape
 
                 assert np.all(f_a(vx, vy) == f_b(vx, vy))
                 topo = f_a.maker.fgraph.toposort()
-                if theano.config.mode != "FAST_COMPILE":
+                if aesara.config.mode != "FAST_COMPILE":
                     nb = 0
                 else:
                     nb = 1
@@ -1602,7 +1602,7 @@ def test_int32_dtype(self):
         m2 = sparse.dot(m1, C)
         y = m2.reshape(shape=(2, 4, 9), ndim=3)
 
-        f = theano.function(inputs=[I, C], outputs=y)
+        f = aesara.function(inputs=[I, C], outputs=y)
         i = np.asarray([[4, 3, 7, 7], [2, 8, 4, 5]], dtype=intX)
         a = np.asarray(np.random.randint(0, 100, (size, size)), dtype=intX)
         f(i, a)
@@ -1634,10 +1634,10 @@ def setup_method(self):
 
         self.rng = np.random.RandomState(seed=utt.fetch_seed())
         self.x = np.asarray(
-            self.rng.binomial(1, 0.5, x_size), dtype=theano.config.floatX
+            self.rng.binomial(1, 0.5, x_size), dtype=aesara.config.floatX
         )
-        self.y = np.asarray(self.rng.uniform(-1, 1, y_size), dtype=theano.config.floatX)
-        self.z = np.asarray(self.rng.uniform(-1, 1, z_size), dtype=theano.config.floatX)
+        self.y = np.asarray(self.rng.uniform(-1, 1, y_size), dtype=aesara.config.floatX)
+        self.z = np.asarray(self.rng.uniform(-1, 1, z_size), dtype=aesara.config.floatX)
 
     @pytest.mark.slow
     def test_basic(self):
@@ -1667,7 +1667,7 @@ def mat(format, name, dtype):
             x = mat(format1, "x", dtype1)
             y = mat(format2, "y", dtype2)
             a = scalar("a", dtype=dtype3)
-            z = theano.shared(np.asarray(self.z, dtype=dtype4).copy())
+            z = aesara.shared(np.asarray(self.z, dtype=dtype4).copy())
 
             def f_b(z, a, x, y):
                 return z - a * (x * y)
@@ -1684,29 +1684,29 @@ def f_b(z, a, x, y):
             f_b_out = f_b(z_data, a_data, x_data, y_data)
 
             # Can it work inplace?
-            inplace = dtype4 == theano.scalar.upcast(dtype1, dtype2, dtype3)
+            inplace = dtype4 == aesara.scalar.upcast(dtype1, dtype2, dtype3)
 
             # To make it easier to check the toposort
-            mode = theano.compile.mode.get_default_mode().excluding("fusion")
+            mode = aesara.compile.mode.get_default_mode().excluding("fusion")
 
             if inplace:
                 updates = [(z, z - a * sparse.dot(x, y))]
-                f_a = theano.function([a, x, y], [], updates=updates, mode=mode)
+                f_a = aesara.function([a, x, y], [], updates=updates, mode=mode)
                 f_a(a_data, x_data, y_data)
                 f_a_out = z.get_value(borrow=True)
             else:
-                f_a = theano.function([a, x, y], z - a * sparse.dot(x, y), mode=mode)
+                f_a = aesara.function([a, x, y], z - a * sparse.dot(x, y), mode=mode)
                 # In DebugMode there is a strange difference with complex
                 # So we raise a little the threshold a little.
                 try:
-                    orig_atol = theano.tensor.math.float64_atol
-                    orig_rtol = theano.tensor.math.float64_rtol
-                    theano.tensor.math.float64_atol = 1e-7
-                    theano.tensor.math.float64_rtol = 1e-6
+                    orig_atol = aesara.tensor.math.float64_atol
+                    orig_rtol = aesara.tensor.math.float64_rtol
+                    aesara.tensor.math.float64_atol = 1e-7
+                    aesara.tensor.math.float64_rtol = 1e-6
                     f_a_out = f_a(a_data, x_data, y_data)
                 finally:
-                    theano.tensor.math.float64_atol = orig_atol
-                    theano.tensor.math.float64_rtol = orig_rtol
+                    aesara.tensor.math.float64_atol = orig_atol
+                    aesara.tensor.math.float64_rtol = orig_rtol
 
             # As we do a dot product of 2 vector of 100 element,
             # This mean we can have 2*100*eps abs error.
@@ -1718,27 +1718,27 @@ def f_b(z, a, x, y):
                 rtol = None
             utt.assert_allclose(f_a_out, f_b_out, rtol=rtol, atol=atol)
             topo = f_a.maker.fgraph.toposort()
-            up = theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4)
+            up = aesara.scalar.upcast(dtype1, dtype2, dtype3, dtype4)
 
-            fast_compile = theano.config.mode == "FAST_COMPILE"
+            fast_compile = aesara.config.mode == "FAST_COMPILE"
 
-            if not theano.config.blas__ldflags:
+            if not aesara.config.blas__ldflags:
                 # Usmm should not be inserted, because it relies on BLAS
                 assert len(topo) == 4, topo
                 assert isinstance(topo[0].op, sparse.Dot)
                 assert isinstance(topo[1].op, DimShuffle)
                 assert isinstance(topo[2].op, Elemwise) and isinstance(
-                    topo[2].op.scalar_op, theano.scalar.Mul
+                    topo[2].op.scalar_op, aesara.scalar.Mul
                 )
                 assert isinstance(topo[3].op, Elemwise) and isinstance(
-                    topo[3].op.scalar_op, theano.scalar.Sub
+                    topo[3].op.scalar_op, aesara.scalar.Sub
                 )
             elif (
                 y.type.dtype == up
                 and format1 == "csc"
                 and format2 == "dense"
                 and not fast_compile
-                and theano.config.cxx
+                and aesara.config.cxx
                 and up in ("float32", "float64")
             ):
                 # The op UsmmCscDense should be inserted
@@ -1746,7 +1746,7 @@ def f_b(z, a, x, y):
                     sum(
                         [
                             isinstance(node.op, Elemwise)
-                            and isinstance(node.op.scalar_op, theano.scalar.basic.Cast)
+                            and isinstance(node.op.scalar_op, aesara.scalar.basic.Cast)
                             for node in topo
                         ]
                     )
@@ -1756,7 +1756,7 @@ def f_b(z, a, x, y):
                 for node in topo:
                     if not (
                         isinstance(node.op, Elemwise)
-                        and isinstance(node.op.scalar_op, theano.scalar.basic.Cast)
+                        and isinstance(node.op.scalar_op, aesara.scalar.basic.Cast)
                     ):
                         new_topo.append(node)
                 topo = new_topo
@@ -1779,7 +1779,7 @@ def check_once(x):
                 # The op Usmm should be inserted
                 assert len(topo) == 3, topo
                 assert isinstance(topo[0].op, DimShuffle)
-                assert topo[1].op == theano.tensor.neg
+                assert topo[1].op == aesara.tensor.neg
                 assert isinstance(topo[2].op, sparse.Usmm)
 
     def test_infer_shape(self):
@@ -1800,7 +1800,7 @@ def mat(format, name, dtype):
             x = mat(format1, "x", dtype1)
             y = mat(format2, "y", dtype2)
             a = scalar("a", dtype=dtype3)
-            z = theano.shared(np.asarray(self.z, dtype=dtype4).copy())
+            z = aesara.shared(np.asarray(self.z, dtype=dtype4).copy())
 
             def f_b(z, a, x, y):
                 return z - a * (x * y)
@@ -1817,18 +1817,18 @@ def f_b(z, a, x, y):
             f_b_out = f_b(z_data, a_data, x_data, y_data)
 
             # Can it work inplace?
-            # inplace = dtype4 == theano.scalar.upcast(dtype1, dtype2, dtype3)
+            # inplace = dtype4 == aesara.scalar.upcast(dtype1, dtype2, dtype3)
 
             # To make it easier to check the toposort
-            mode = theano.compile.mode.get_default_mode().excluding("fusion")
+            mode = aesara.compile.mode.get_default_mode().excluding("fusion")
 
             # test infer_shape of Dot got applied
-            f_shape = theano.function(
+            f_shape = aesara.function(
                 [a, x, y], (z - a * sparse.dot(x, y)).shape, mode=mode
             )
             assert all(f_shape(a_data, x_data, y_data) == f_b_out.shape)
             topo = f_shape.maker.fgraph.toposort()
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 nb = 0
             else:
                 nb = 1
@@ -1841,10 +1841,10 @@ def f_b(z, a, x, y):
 class TestZerosLike:
     def test(self):
         x = sparse.csr_matrix()
-        f = theano.function([x], sparse.sp_zeros_like(x))
+        f = aesara.function([x], sparse.sp_zeros_like(x))
         vx = sp.sparse.csr_matrix(
             np.asarray(
-                np.random.binomial(1, 0.5, (100, 100)), dtype=theano.config.floatX
+                np.random.binomial(1, 0.5, (100, 100)), dtype=aesara.config.floatX
             )
         )
 
@@ -1858,7 +1858,7 @@ def test_shape_i():
     sparse_dtype = "float32"
 
     a = SparseType("csr", dtype=sparse_dtype)()
-    f = theano.function([a], a.shape[1])
+    f = aesara.function([a], a.shape[1])
     assert f(sp.sparse.csr_matrix(random_lil((100, 10), sparse_dtype, 3))) == 10
 
 
@@ -1868,11 +1868,11 @@ def test_shape():
     sparse_dtype = "float32"
 
     a = SparseType("csr", dtype=sparse_dtype)()
-    f = theano.function([a], a.shape)
+    f = aesara.function([a], a.shape)
     assert np.all(
         f(sp.sparse.csr_matrix(random_lil((100, 10), sparse_dtype, 3))) == (100, 10)
     )
-    if theano.config.mode != "FAST_COMPILE":
+    if aesara.config.mode != "FAST_COMPILE":
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 3
         assert isinstance(topo[0].op, Shape_i)
@@ -1932,7 +1932,7 @@ def test_sparse_shared_memory():
     sdot = sparse.structured_dot
     z = sdot(x * 3, m1) + sdot(y * 2, m2)
 
-    f = theano.function([In(x, mutable=True), In(y, mutable=True)], z, mode="FAST_RUN")
+    f = aesara.function([In(x, mutable=True), In(y, mutable=True)], z, mode="FAST_RUN")
 
     def f_(x, y, m1=m1, m2=m2):
         return ((x * 3) * m1) + ((y * 2) * m2)
@@ -1947,9 +1947,9 @@ def test_size():
     # Ensure the `size` attribute of sparse matrices behaves as in numpy.
 
     for sparse_type in ("csc_matrix", "csr_matrix"):
-        x = getattr(theano.sparse, sparse_type)()
+        x = getattr(aesara.sparse, sparse_type)()
         y = getattr(sp.sparse, sparse_type)((5, 7)).astype(config.floatX)
-        get_size = theano.function([x], x.size)
+        get_size = aesara.function([x], x.size)
 
         def check():
             assert y.size == get_size(y)
@@ -1974,7 +1974,7 @@ def test_op(self):
             variable.append(vector())
             data.append(np.random.random(10).astype(config.floatX))
 
-            f = theano.function(variable, self.op(*variable))
+            f = aesara.function(variable, self.op(*variable))
 
             tested = f(*data)
             x, s = data[0].toarray(), data[1][np.newaxis, :]
@@ -2011,7 +2011,7 @@ def test_op(self):
             variable.append(vector())
             data.append(np.random.random(8).astype(config.floatX))
 
-            f = theano.function(variable, self.op(*variable))
+            f = aesara.function(variable, self.op(*variable))
 
             tested = f(*data)
             x, s = data[0].toarray(), data[1][:, np.newaxis]
@@ -2056,7 +2056,7 @@ def test_op(self):
                 else:
                     assert z.type.broadcastable == (False,)
 
-                f = theano.function(variable, self.op(variable[0], axis=axis))
+                f = aesara.function(variable, self.op(variable[0], axis=axis))
                 tested = f(*data)
                 expected = data[0].todense().sum(axis).ravel()
                 utt.assert_allclose(expected, tested)
@@ -2094,7 +2094,7 @@ def test_op(self):
             z = self.op(*variable)
             assert z.type.broadcastable == (False,)
 
-            f = theano.function(variable, z)
+            f = aesara.function(variable, z)
             tested = f(*data)
             expected = data[0].toarray().diagonal()
 
@@ -2125,7 +2125,7 @@ def test_op(self):
                 variable = [vector()]
                 data = [np.random.random(size).astype(config.floatX)]
 
-                f = theano.function(variable, self.op(*variable))
+                f = aesara.function(variable, self.op(*variable))
                 tested = f(*data).toarray()
 
                 expected = np.diag(*data)
@@ -2162,7 +2162,7 @@ def test_op(self):
             for shape in zip(range(5, 9), range(3, 7)[::-1]):
                 variable, data = sparse_random_inputs(format, shape=shape)
 
-                f = theano.function(variable, self.op(*variable))
+                f = aesara.function(variable, self.op(*variable))
                 tested = f(*data).toarray()
                 expected = data[0].sorted_indices().toarray()
 
@@ -2195,7 +2195,7 @@ def test_op(self):
 
                 data[0][0, 0] = data[0][1, 1] = 0
 
-                f = theano.function(variable, self.op(*variable))
+                f = aesara.function(variable, self.op(*variable))
                 tested = f(*data)
                 expected = data[0]
                 expected.eliminate_zeros()
@@ -2243,13 +2243,13 @@ def test_remove0(self):
                 assert 0 in mat.data or not zero
                 assert not mat.has_sorted_indices or not unsor
 
-                # the In thingy has to be there because theano has as rule not
+                # the In thingy has to be there because aesara has as rule not
                 # to optimize inputs
-                f = theano.function([In(x, borrow=True, mutable=True)], Remove0()(x))
+                f = aesara.function([In(x, borrow=True, mutable=True)], Remove0()(x))
 
                 # assert optimization local_inplace_remove0 is applied in
                 # modes with optimization
-                if theano.config.mode not in ["FAST_COMPILE"]:
+                if aesara.config.mode not in ["FAST_COMPILE"]:
                     # list of apply nodes in the optimized graph.
                     nodes = f.maker.fgraph.toposort()
                     # Check there isn't any Remove0 instance not inplace.
@@ -2284,22 +2284,22 @@ def test_infer_shape(self):
         mat = (np.arange(12) + 1).reshape((4, 3))
         mat[0, 1] = mat[1, 0] = mat[2, 2] = 0
 
-        x_csc = sparse.csc_matrix(dtype=theano.config.floatX)
-        mat_csc = sp.sparse.csc_matrix(mat, dtype=theano.config.floatX)
+        x_csc = sparse.csc_matrix(dtype=aesara.config.floatX)
+        mat_csc = sp.sparse.csc_matrix(mat, dtype=aesara.config.floatX)
         self._compile_and_check([x_csc], [Remove0()(x_csc)], [mat_csc], self.op_class)
 
-        x_csr = sparse.csr_matrix(dtype=theano.config.floatX)
-        mat_csr = sp.sparse.csr_matrix(mat, dtype=theano.config.floatX)
+        x_csr = sparse.csr_matrix(dtype=aesara.config.floatX)
+        mat_csr = sp.sparse.csr_matrix(mat, dtype=aesara.config.floatX)
         self._compile_and_check([x_csr], [Remove0()(x_csr)], [mat_csr], self.op_class)
 
     def test_grad(self):
         mat = (np.arange(9) + 1).reshape((3, 3))
         mat[0, 1] = mat[1, 0] = mat[2, 2] = 0
 
-        mat_csc = sp.sparse.csc_matrix(mat, dtype=theano.config.floatX)
+        mat_csc = sp.sparse.csc_matrix(mat, dtype=aesara.config.floatX)
         verify_grad_sparse(Remove0(), [mat_csc])
 
-        mat_csr = sp.sparse.csr_matrix(mat, dtype=theano.config.floatX)
+        mat_csr = sp.sparse.csr_matrix(mat, dtype=aesara.config.floatX)
         verify_grad_sparse(Remove0(), [mat_csr])
 
 
@@ -2314,8 +2314,8 @@ def test_GetItemList(self):
         y = a[0][[0, 1, 2, 3, 1]]
         z = b[0][[0, 1, 2, 3, 1]]
 
-        fa = theano.function([a[0]], y)
-        fb = theano.function([b[0]], z)
+        fa = aesara.function([a[0]], y)
+        fb = aesara.function([b[0]], z)
 
         t_geta = fa(A[0]).todense()
         t_getb = fb(B[0]).todense()
@@ -2329,7 +2329,7 @@ def test_GetItemList(self):
     def test_GetItemList_wrong_index(self):
         a, A = sparse_random_inputs("csr", (4, 5))
         y = a[0][[0, 4]]
-        f = theano.function([a[0]], y)
+        f = aesara.function([a[0]], y)
 
         with pytest.raises(IndexError):
             f(A[0])
@@ -2354,8 +2354,8 @@ def test_GetItem2Lists(self):
         y = a[0][[0, 0, 1, 3], [0, 1, 2, 4]]
         z = b[0][[0, 0, 1, 3], [0, 1, 2, 4]]
 
-        fa = theano.function([a[0]], y)
-        fb = theano.function([b[0]], z)
+        fa = aesara.function([a[0]], y)
+        fb = aesara.function([b[0]], z)
 
         t_geta = fa(A[0])
         t_getb = fb(B[0])
@@ -2371,8 +2371,8 @@ def test_GetItem2Lists_wrong_index(self):
         y1 = a[0][[0, 5], [0, 3]]
         y2 = a[0][[0, 3], [0, 5]]
 
-        f1 = theano.function([a[0]], y1)
-        f2 = theano.function([a[0]], y2)
+        f1 = aesara.function([a[0]], y1)
+        f2 = aesara.function([a[0]], y2)
 
         with pytest.raises(IndexError):
             f1(A[0])
@@ -2415,18 +2415,18 @@ def test_GetItem2D(self):
                 k = None
 
             vx = as_sparse_format(self.rng.binomial(1, 0.5, (100, 97)), format).astype(
-                theano.config.floatX
+                aesara.config.floatX
             )
 
-            # mode_no_debug = theano.compile.mode.get_default_mode()
-            # if isinstance(mode_no_debug, theano.compile.debugmode.DebugMode):
+            # mode_no_debug = aesara.compile.mode.get_default_mode()
+            # if isinstance(mode_no_debug, aesara.compile.debugmode.DebugMode):
             #    mode_no_debug = 'FAST_RUN'
             if is_supported_version:
-                f1 = theano.function([x, a, b, c, d, e, f], x[a:b:e, c:d:f])
+                f1 = aesara.function([x, a, b, c, d, e, f], x[a:b:e, c:d:f])
                 r1 = f1(vx, m, n, p, q, j, k)
                 t1 = vx[m:n:j, p:q:k]
             else:
-                f1 = theano.function([x, a, b, c, d], x[a:b, c:d])
+                f1 = aesara.function([x, a, b, c, d], x[a:b, c:d])
                 r1 = f1(vx, m, n, p, q)
                 t1 = vx[m:n, p:q]
             assert r1.shape == t1.shape
@@ -2437,65 +2437,65 @@ def test_GetItem2D(self):
             The following indexing methods is not supported because the rval
             would be a sparse matrix rather than a sparse vector, which is a
             deviation from numpy indexing rule. This decision is made largely
-            for keeping the consistency between numpy and theano.
+            for keeping the consistency between numpy and aesara.
 
-            f2 = theano.function([x, a, b, c], x[a:b, c])
+            f2 = aesara.function([x, a, b, c], x[a:b, c])
             r2 = f2(vx, m, n, p)
             t2 = vx[m:n, p]
             assert r2.shape == t2.shape
             assert np.all(t2.toarray() == r2.toarray())
 
-            f3 = theano.function([x, a, b, c], x[a, b:c])
+            f3 = aesara.function([x, a, b, c], x[a, b:c])
             r3 = f3(vx, m, n, p)
             t3 = vx[m, n:p]
             assert r3.shape == t3.shape
             assert np.all(t3.toarray() == r3.toarray())
 
-            f5 = theano.function([x], x[1:2,3])
+            f5 = aesara.function([x], x[1:2,3])
             r5 = f5(vx)
             t5 = vx[1:2, 3]
             assert r5.shape == t5.shape
             assert np.all(r5.toarray() == t5.toarray())
 
-            f7 = theano.function([x], x[50])
+            f7 = aesara.function([x], x[50])
             r7 = f7(vx)
             t7 = vx[50]
             assert r7.shape == t7.shape
             assert np.all(r7.toarray() == t7.toarray())
             """
             if is_supported_version:
-                f4 = theano.function([x, a, b, e], x[a:b:e])
+                f4 = aesara.function([x, a, b, e], x[a:b:e])
                 r4 = f4(vx, m, n, j)
                 t4 = vx[m:n:j]
             else:
-                f4 = theano.function([x, a, b], x[a:b])
+                f4 = aesara.function([x, a, b], x[a:b])
                 r4 = f4(vx, m, n)
                 t4 = vx[m:n]
             assert r4.shape == t4.shape
             assert np.all(t4.toarray() == r4.toarray())
 
             # -----------------------------------------------------------
-            # test cases using int indexing instead of theano variable
-            f6 = theano.function([x], x[1:10:j, 10:20:k])
+            # test cases using int indexing instead of aesara variable
+            f6 = aesara.function([x], x[1:10:j, 10:20:k])
             r6 = f6(vx)
             t6 = vx[1:10:j, 10:20:k]
             assert r6.shape == t6.shape
             assert np.all(r6.toarray() == t6.toarray())
 
             # ----------------------------------------------------------
-            # test cases with indexing both with theano variable and int
+            # test cases with indexing both with aesara variable and int
             if is_supported_version:
-                f8 = theano.function([x, a, b, e], x[a:b:e, 10:20:1])
+                f8 = aesara.function([x, a, b, e], x[a:b:e, 10:20:1])
                 r8 = f8(vx, m, n, j)
                 t8 = vx[m:n:j, 10:20:1]
             else:
-                f8 = theano.function([x, a, b], x[a:b, 10:20])
+                f8 = aesara.function([x, a, b], x[a:b, 10:20])
                 r8 = f8(vx, m, n)
                 t8 = vx[m:n, 10:20]
             assert r8.shape == t8.shape
             assert np.all(r8.toarray() == t8.toarray())
 
-            f9 = theano.function([x, a, b], x[1:a:j, 1:b:k])
+            f9 = aesara.function([x, a, b], x[1:a:j, 1:b:k])
             r9 = f9(vx, p, q)
             t9 = vx[1:p:j, 1:q:k]
             assert r9.shape == t9.shape
@@ -2503,21 +2503,21 @@ def test_GetItem2D(self):
 
             # -----------------------------------------------------------
             # Test mixing None and variables
-            f10 = theano.function([x, a, b], x[:a, :b])
+            f10 = aesara.function([x, a, b], x[:a, :b])
             r10 = f10(vx, p, q)
             t10 = vx[:p, :q]
             assert r10.shape == t10.shape
             assert np.all(r10.toarray() == t10.toarray())
 
-            f11 = theano.function([x, a], x[:, a:])
+            f11 = aesara.function([x, a], x[:, a:])
             r11 = f11(vx, p)
             t11 = vx[:, p:]
             assert r11.shape == t11.shape
             assert np.all(r11.toarray() == t11.toarray())
 
             # Test that is work with shared variable
-            sx = theano.shared(vx)
-            f12 = theano.function([a], sx[:, a:])
+            sx = aesara.shared(vx)
+            f12 = aesara.function([a], sx[:, a:])
             r12 = f12(p)
             t12 = vx[:, p:]
             assert r12.shape == t12.shape
@@ -2560,36 +2560,36 @@ def test_GetItemScalar(self):
             n = 42
 
             vx = as_sparse_format(self.rng.binomial(1, 0.5, (97, 100)), format).astype(
-                theano.config.floatX
+                aesara.config.floatX
             )
 
-            f1 = theano.function([x, a, b], x[a, b])
+            f1 = aesara.function([x, a, b], x[a, b])
             r1 = f1(vx, 10, 10)
             t1 = vx[10, 10]
             assert r1.shape == t1.shape
             assert np.all(t1 == r1)
 
-            f2 = theano.function([x, a], x[50, a])
+            f2 = aesara.function([x, a], x[50, a])
             r2 = f2(vx, m)
             t2 = vx[50, m]
             assert r2.shape == t2.shape
             assert np.all(t2 == r2)
 
-            f3 = theano.function([x, a], x[a, 50])
+            f3 = aesara.function([x, a], x[a, 50])
             r3 = f3(vx, m)
             t3 = vx[m, 50]
             assert r3.shape == t3.shape
             assert np.all(t3 == r3)
 
-            f4 = theano.function([x], x[50, 42])
+            f4 = aesara.function([x], x[50, 42])
             r4 = f4(vx)
             t4 = vx[m, n]
             assert r3.shape == t3.shape
             assert np.all(t4 == r4)
 
             # Test that is work with shared variable
-            sx = theano.shared(vx)
-            f1 = theano.function([a, b], sx[a, b])
+            sx = aesara.shared(vx)
+            f1 = aesara.function([a, b], sx[a, b])
             r1 = f1(10, 10)
             t1 = vx[10, 10]
             assert r1.shape == t1.shape
@@ -2609,9 +2609,9 @@ def test_cast(self):
                         format, shape=(4, 7), out_dtype=i_dtype
                     )
 
-                    func = theano.function([variable], cast(variable, o_dtype))
-                    cls = theano.function([variable], Cast(o_dtype)(variable))
-                    prop = theano.function([variable], variable.astype(o_dtype))
+                    func = aesara.function([variable], cast(variable, o_dtype))
+                    cls = aesara.function([variable], Cast(o_dtype)(variable))
+                    prop = aesara.function([variable], variable.astype(o_dtype))
 
                     t_func, t_cls, t_prop = func(data), cls(data), prop(data)
 
@@ -2664,12 +2664,12 @@ def _format_info(nb):
     mat = {}
 
     for format in sparse.sparse_formats:
-        variable = getattr(theano.sparse, format + "_matrix")
+        variable = getattr(aesara.sparse, format + "_matrix")
         spa = getattr(sp.sparse, format + "_matrix")
 
         x[format] = [variable() for t in range(nb)]
         mat[format] = [
-            spa(random_lil((3, 4), theano.config.floatX, 8)) for t in range(nb)
+            spa(random_lil((3, 4), aesara.config.floatX, 8)) for t in range(nb)
         ]
     return x, mat
 
@@ -2688,7 +2688,7 @@ def test_op(self):
                 for dtype in sparse.all_dtypes:
                     blocks = self.mat[format]
 
-                    f = theano.function(
+                    f = aesara.function(
                         self.x[format],
                         self.op_class(format=out_f, dtype=dtype)(*self.x[format]),
                         allow_input_downcast=True,
@@ -2756,10 +2756,10 @@ def setup_method(self):
         self.op_class = AddSSData
 
         for format in sparse.sparse_formats:
-            variable = getattr(theano.sparse, format + "_matrix")
+            variable = getattr(aesara.sparse, format + "_matrix")
 
             rand = np.array(
-                np.random.randint(1, 4, size=(3, 4)) - 1, dtype=theano.config.floatX
+                np.random.randint(1, 4, size=(3, 4)) - 1, dtype=aesara.config.floatX
             )
             constant = as_sparse_format(rand, format)
 
@@ -2768,7 +2768,7 @@ def setup_method(self):
 
     def test_op(self):
         for format in sparse.sparse_formats:
-            f = theano.function(self.x[format], add_s_s_data(*self.x[format]))
+            f = aesara.function(self.x[format], add_s_s_data(*self.x[format]))
 
             tested = f(*self.a[format])
             expected = 2 * self.a[format][0]
@@ -2858,7 +2858,7 @@ def test_op(self):
                         format, shape=(4, 7), out_dtype=dtype, gap=self.gap
                     )
 
-                    f = theano.function(variable, self.op(*variable))
+                    f = aesara.function(variable, self.op(*variable))
 
                     tested = f(*data)
                     data = [m.toarray() for m in data]
@@ -2904,7 +2904,7 @@ def test_op(self):
                             format, shape=(4, 7), out_dtype=dtype, gap=domain
                         )
 
-                        f = theano.function(variable, self.op(*variable))
+                        f = aesara.function(variable, self.op(*variable))
 
                         old_value = (
                             tensor.math.float32_atol,
@@ -2971,12 +2971,12 @@ def make_block(dtype):
     def get_expected_dtype(blocks, to_dtype):
         if to_dtype is None:
             block_dtypes = tuple(b.dtype for b in blocks)
-            return theano.scalar.upcast(*block_dtypes)
+            return aesara.scalar.upcast(*block_dtypes)
         else:
             return to_dtype
 
     # a deliberately weird mix of dtypes to stack
-    dtypes = ("complex128", theano.config.floatX)
+    dtypes = ("complex128", aesara.config.floatX)
 
     blocks = [make_block(dtype) for dtype in dtypes]
 
@@ -3163,7 +3163,7 @@ def test_mul_s_v(self):
             for dtype in ["float32", "float64"]:
                 x = sparse.SparseType(format, dtype=dtype)()
                 y = vector(dtype=dtype)
-                f = theano.function([x, y], mul_s_v(x, y))
+                f = aesara.function([x, y], mul_s_v(x, y))
 
                 spmat = sp_types[format](random_lil((4, 3), dtype, 3))
                 mat = np.asarray(np.random.rand(3), dtype=dtype)
@@ -3194,7 +3194,7 @@ def test_structured_add_s_v(self):
             for dtype in ["float32", "float64"]:
                 x = sparse.SparseType(format, dtype=dtype)()
                 y = vector(dtype=dtype)
-                f = theano.function([x, y], structured_add_s_v(x, y))
+                f = aesara.function([x, y], structured_add_s_v(x, y))
 
                 spmat = sp_types[format](random_lil((4, 3), dtype, 3))
                 spones = spmat.copy()
@@ -3221,7 +3221,7 @@ def test_op_ss(self):
                     format, shape=(10, 10), out_dtype=dtype, n=2, p=0.1
                 )
 
-                f = theano.function(variable, self.op(*variable))
+                f = aesara.function(variable, self.op(*variable))
 
                 tested = f(*data)
 
@@ -3242,7 +3242,7 @@ def test_op_sd(self):
                 variable[1] = TensorType(dtype=dtype, broadcastable=(False, False))()
                 data[1] = data[1].toarray()
 
-                f = theano.function(variable, self.op(*variable))
+                f = aesara.function(variable, self.op(*variable))
 
                 tested = f(*data)
                 expected = np.dot(data[0].toarray(), data[1])
@@ -3285,9 +3285,9 @@ class TestSamplingDot(utt.InferShapeTester):
     x.append(sparse.csr_matrix())
     # unsquare shape
     a = [
-        np.array(np.random.randint(1, 6, size=(4, 3)) - 1, dtype=theano.config.floatX),
-        np.array(np.random.randint(1, 6, size=(5, 3)) - 1, dtype=theano.config.floatX),
-        np.array(np.random.randint(1, 3, size=(4, 5)) - 1, dtype=theano.config.floatX),
+        np.array(np.random.randint(1, 6, size=(4, 3)) - 1, dtype=aesara.config.floatX),
+        np.array(np.random.randint(1, 6, size=(5, 3)) - 1, dtype=aesara.config.floatX),
+        np.array(np.random.randint(1, 3, size=(4, 5)) - 1, dtype=aesara.config.floatX),
     ]
     a[2] = sp.sparse.csr_matrix(a[2])
 
@@ -3296,7 +3296,7 @@ def setup_method(self):
         self.op_class = SamplingDot
 
     def test_op(self):
-        f = theano.function(self.x, sampling_dot(*self.x))
+        f = aesara.function(self.x, sampling_dot(*self.x))
 
         tested = f(*self.a)
         x, y, p = self.a
@@ -3307,7 +3307,7 @@ def test_op(self):
         assert tested.dtype == expected.dtype
 
     def test_negative_stride(self):
-        f = theano.function(self.x, sampling_dot(*self.x))
+        f = aesara.function(self.x, sampling_dot(*self.x))
 
         a2 = [self.a[0][::-1, :], self.a[1][:, ::-1], self.a[2]]
         tested = f(*a2)
@@ -3345,7 +3345,7 @@ def _helper(x, y):
     shared_constructor_accept_ndarray_=False,
     internal_type_=sp.sparse.csc_matrix,
     check_internal_type_=sp.sparse.issparse,
-    theano_fct_=lambda a: dense_from_sparse(a * 2.0),
+    aesara_fct_=lambda a: dense_from_sparse(a * 2.0),
     ref_fct_=lambda a: np.asarray((a * 2).todense()),
     cast_value_=sp.sparse.csr_matrix,
     expect_fail_fast_shape_inplace=False,
diff --git a/tests/sparse/test_opt.py b/tests/sparse/test_opt.py
index b535343504..2aae48059c 100644
--- a/tests/sparse/test_opt.py
+++ b/tests/sparse/test_opt.py
@@ -5,15 +5,15 @@
 
 import numpy as np
 
-import theano
+import aesara
+from aesara import sparse
+from aesara.compile.mode import Mode, get_default_mode
+from aesara.configdefaults import config
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.type import ivector, matrix, vector
 from tests import unittest_tools as utt
 from tests.sparse.test_basic import random_lil
-from theano import sparse
-from theano.compile.mode import Mode, get_default_mode
-from theano.configdefaults import config
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.type import ivector, matrix, vector
 
 
 def test_local_csm_properties_csm():
@@ -25,7 +25,7 @@ def test_local_csm_properties_csm():
         (sparse.CSC, sp.sparse.csc_matrix),
         (sparse.CSR, sp.sparse.csr_matrix),
     ]:
-        f = theano.function(
+        f = aesara.function(
             [data, indices, indptr, shape],
             sparse.csm_properties(CS(data, indices, indptr, shape)),
             mode=mode,
@@ -40,14 +40,14 @@ def test_local_csm_properties_csm():
 
 @pytest.mark.skip(reason="Opt disabled as it don't support unsorted indices")
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_local_csm_grad_c():
     data = vector()
     indices, indptr, shape = (ivector(), ivector(), ivector())
     mode = get_default_mode()
 
-    if theano.config.mode == "FAST_COMPILE":
+    if aesara.config.mode == "FAST_COMPILE":
         mode = Mode(linker="c|py", optimizer="fast_compile")
 
     mode = mode.including("specialize", "local_csm_grad_c")
@@ -56,8 +56,8 @@ def test_local_csm_grad_c():
         (sparse.CSR, sp.sparse.csr_matrix),
     ]:
         cost = tt_sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape)))
-        f = theano.function(
-            [data, indices, indptr, shape], theano.grad(cost, data), mode=mode
+        f = aesara.function(
+            [data, indices, indptr, shape], aesara.grad(cost, data), mode=mode
         )
         assert not any(
             isinstance(node.op, sparse.CSMGrad) for node in f.maker.fgraph.toposort()
@@ -67,16 +67,16 @@ def test_local_csm_grad_c():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_local_mul_s_d():
     mode = get_default_mode()
     mode = mode.including("specialize", "local_mul_s_d")
 
     for sp_format in sparse.sparse_formats:
-        inputs = [getattr(theano.sparse, sp_format + "_matrix")(), matrix()]
+        inputs = [getattr(aesara.sparse, sp_format + "_matrix")(), matrix()]
 
-        f = theano.function(inputs, sparse.mul_s_d(*inputs), mode=mode)
+        f = aesara.function(inputs, sparse.mul_s_d(*inputs), mode=mode)
 
         assert not any(
             isinstance(node.op, sparse.MulSD) for node in f.maker.fgraph.toposort()
@@ -84,16 +84,16 @@ def test_local_mul_s_d():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_local_mul_s_v():
     mode = get_default_mode()
     mode = mode.including("specialize", "local_mul_s_v")
 
     for sp_format in ["csr"]:  # Not implemented for other format
-        inputs = [getattr(theano.sparse, sp_format + "_matrix")(), vector()]
+        inputs = [getattr(aesara.sparse, sp_format + "_matrix")(), vector()]
 
-        f = theano.function(inputs, sparse.mul_s_v(*inputs), mode=mode)
+        f = aesara.function(inputs, sparse.mul_s_v(*inputs), mode=mode)
 
         assert not any(
             isinstance(node.op, sparse.MulSV) for node in f.maker.fgraph.toposort()
@@ -101,16 +101,16 @@ def test_local_mul_s_v():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_local_structured_add_s_v():
     mode = get_default_mode()
     mode = mode.including("specialize", "local_structured_add_s_v")
 
     for sp_format in ["csr"]:  # Not implemented for other format
-        inputs = [getattr(theano.sparse, sp_format + "_matrix")(), vector()]
+        inputs = [getattr(aesara.sparse, sp_format + "_matrix")(), vector()]
 
-        f = theano.function(inputs, sparse.structured_add_s_v(*inputs), mode=mode)
+        f = aesara.function(inputs, sparse.structured_add_s_v(*inputs), mode=mode)
 
         assert not any(
             isinstance(node.op, sparse.StructuredAddSV)
@@ -119,7 +119,7 @@ def test_local_structured_add_s_v():
 
 
 @pytest.mark.skipif(
-    not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+    not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
 )
 def test_local_sampling_dot_csr():
     mode = get_default_mode()
@@ -129,12 +129,12 @@ def test_local_sampling_dot_csr():
         inputs = [
             matrix(),
             matrix(),
-            getattr(theano.sparse, sp_format + "_matrix")(),
+            getattr(aesara.sparse, sp_format + "_matrix")(),
         ]
 
-        f = theano.function(inputs, sparse.sampling_dot(*inputs), mode=mode)
+        f = aesara.function(inputs, sparse.sampling_dot(*inputs), mode=mode)
 
-        if theano.config.blas__ldflags:
+        if aesara.config.blas__ldflags:
             assert not any(
                 isinstance(node.op, sparse.SamplingDot)
                 for node in f.maker.fgraph.toposort()
@@ -153,10 +153,10 @@ def test_local_dense_from_sparse_sparse_from_dense():
     mode = mode.including("local_dense_from_sparse_sparse_from_dense")
 
     m = matrix()
-    for op in [theano.sparse.csr_from_dense, theano.sparse.csc_from_dense]:
+    for op in [aesara.sparse.csr_from_dense, aesara.sparse.csc_from_dense]:
         s = op(m)
-        o = theano.sparse.dense_from_sparse(s)
-        f = theano.function([m], o, mode=mode)
+        o = aesara.sparse.dense_from_sparse(s)
+        f = aesara.function([m], o, mode=mode)
         # We should just have a deep copy.
         assert len(f.maker.fgraph.apply_nodes) == 1
         f([[1, 2], [3, 4]])
@@ -174,6 +174,6 @@ def test_sd_csc():
     nrows = as_tensor_variable(np.int32(A.shape[0]))
     b = as_tensor_variable(b)
 
-    res = theano.sparse.opt.sd_csc(a_val, a_ind, a_ptr, nrows, b).eval()
+    res = aesara.sparse.opt.sd_csc(a_val, a_ind, a_ptr, nrows, b).eval()
 
     utt.assert_allclose(res, target)
diff --git a/tests/sparse/test_sp2.py b/tests/sparse/test_sp2.py
index 7e18435211..6df11eb9d2 100644
--- a/tests/sparse/test_sp2.py
+++ b/tests/sparse/test_sp2.py
@@ -5,19 +5,19 @@
 
 import numpy as np
 
-import theano
-from tests import unittest_tools as utt
-from tests.sparse.test_basic import as_sparse_format
-from theano import sparse
-from theano.configdefaults import config
-from theano.sparse.sandbox.sp2 import (
+import aesara
+from aesara import sparse
+from aesara.configdefaults import config
+from aesara.sparse.sandbox.sp2 import (
     Binomial,
     Multinomial,
     Poisson,
     multinomial,
     poisson,
 )
-from theano.tensor.type import lscalar, lvector, scalar
+from aesara.tensor.type import lscalar, lvector, scalar
+from tests import unittest_tools as utt
+from tests.sparse.test_basic import as_sparse_format
 
 
 class TestPoisson(utt.InferShapeTester):
@@ -25,10 +25,10 @@ class TestPoisson(utt.InferShapeTester):
     a = {}
 
     for format in sparse.sparse_formats:
-        variable = getattr(theano.sparse, format + "_matrix")
+        variable = getattr(aesara.sparse, format + "_matrix")
 
         rand = np.array(
-            np.random.randint(1, 4, size=(3, 4)) - 1, dtype=theano.config.floatX
+            np.random.randint(1, 4, size=(3, 4)) - 1, dtype=aesara.config.floatX
         )
 
         x[format] = variable()
@@ -40,7 +40,7 @@ def setup_method(self):
 
     def test_op(self):
         for format in sparse.sparse_formats:
-            f = theano.function([self.x[format]], poisson(self.x[format]))
+            f = aesara.function([self.x[format]], poisson(self.x[format]))
 
             tested = f(self.a[format])
 
@@ -77,7 +77,7 @@ def setup_method(self):
     def test_op(self):
         for sp_format in sparse.sparse_formats:
             for o_type in sparse.float_dtypes:
-                f = theano.function(
+                f = aesara.function(
                     self.inputs, Binomial(sp_format, o_type)(*self.inputs)
                 )
 
@@ -119,7 +119,7 @@ def setup_method(self):
 
     def test_op(self):
         n = lscalar()
-        f = theano.function([self.p, n], multinomial(n, self.p))
+        f = aesara.function([self.p, n], multinomial(n, self.p))
 
         _n = 5
         tested = f(self._p, _n)
@@ -128,7 +128,7 @@ def test_op(self):
         assert tested[2, 1] == _n
 
         n = lvector()
-        f = theano.function([self.p, n], multinomial(n, self.p))
+        f = aesara.function([self.p, n], multinomial(n, self.p))
 
         _n = np.asarray([1, 2, 3, 4], dtype="int64")
         tested = f(self._p, _n)
diff --git a/tests/sparse/test_type.py b/tests/sparse/test_type.py
index f1ade41261..c4e804444e 100644
--- a/tests/sparse/test_type.py
+++ b/tests/sparse/test_type.py
@@ -1,5 +1,5 @@
 def test_sparse_type():
-    import theano.sparse
+    import aesara.sparse
 
     # They need to be available even if scipy is not available.
-    assert hasattr(theano.sparse, "SparseType")
+    assert hasattr(aesara.sparse, "SparseType")
diff --git a/tests/sparse/test_utils.py b/tests/sparse/test_utils.py
index 26412a96d2..b156a4a24e 100644
--- a/tests/sparse/test_utils.py
+++ b/tests/sparse/test_utils.py
@@ -4,8 +4,8 @@
 
 sp = pytest.importorskip("scipy", minversion="0.7.0")
 
+from aesara.sparse.utils import hash_from_sparse
 from tests.sparse.test_basic import as_sparse_format
-from theano.sparse.utils import hash_from_sparse
 
 
 def test_hash_from_sparse():
diff --git a/tests/tensor/_test_mpi_roundtrip.py b/tests/tensor/_test_mpi_roundtrip.py
index 5891ed07f8..c8ff980ada 100644
--- a/tests/tensor/_test_mpi_roundtrip.py
+++ b/tests/tensor/_test_mpi_roundtrip.py
@@ -6,11 +6,11 @@
 import numpy as np
 from mpi4py import MPI
 
-import theano
-from theano.configdefaults import config
-from theano.graph.sched import sort_schedule_fn
-from theano.tensor.io import mpi_cmps, recv, send
-from theano.tensor.type import matrix
+import aesara
+from aesara.configdefaults import config
+from aesara.graph.sched import sort_schedule_fn
+from aesara.tensor.io import mpi_cmps, recv, send
+from aesara.tensor.type import matrix
 
 
 comm = MPI.COMM_WORLD
@@ -30,8 +30,8 @@
 dtype = "float32"
 
 scheduler = sort_schedule_fn(*mpi_cmps)
-mode = theano.compile.mode.Mode(
-    optimizer=None, linker=theano.link.c.basic.OpWiseCLinker(schedule=scheduler)
+mode = aesara.compile.mode.Mode(
+    optimizer=None, linker=aesara.link.c.basic.OpWiseCLinker(schedule=scheduler)
 )
 
 with config.change_flags(compute_test_value="off"):
@@ -42,7 +42,7 @@
 
         z = recv(shape, dtype, 1, 12)
 
-        f = theano.function([x], [send_request, z], mode=mode)
+        f = aesara.function([x], [send_request, z], mode=mode)
 
         xx = np.random.rand(*shape).astype(dtype)
         expected = (xx + 1) * 2
@@ -59,6 +59,6 @@
         z = y * 2
         send_request = send(z, 0, 12)
 
-        f = theano.function([], send_request, mode=mode)
+        f = aesara.function([], send_request, mode=mode)
 
         f()
diff --git a/tests/tensor/nnet/speed_test_conv.py b/tests/tensor/nnet/speed_test_conv.py
index e77866608c..65d6c8db61 100644
--- a/tests/tensor/nnet/speed_test_conv.py
+++ b/tests/tensor/nnet/speed_test_conv.py
@@ -2,10 +2,10 @@
 
 import numpy as np
 
-from theano import function
-from theano.compile.mode import Mode
-from theano.tensor.nnet.conv import ConvOp
-from theano.tensor.type import TensorType, dmatrix
+from aesara import function
+from aesara.compile.mode import Mode
+from aesara.tensor.nnet.conv import ConvOp
+from aesara.tensor.type import TensorType, dmatrix
 
 
 def flip(kern, kshp):
diff --git a/tests/tensor/nnet/test_abstract_conv.py b/tests/tensor/nnet/test_abstract_conv.py
index 801710989f..ffdcf2f443 100644
--- a/tests/tensor/nnet/test_abstract_conv.py
+++ b/tests/tensor/nnet/test_abstract_conv.py
@@ -1,15 +1,14 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from theano.compile.mode import Mode
-from theano.configdefaults import config
-from theano.graph.opt import check_stack_trace
-from theano.tensor.nnet import abstract_conv as conv
-from theano.tensor.nnet import conv2d_transpose, corr, corr3d
-from theano.tensor.nnet.abstract_conv import (
+import aesara
+import aesara.tensor as tt
+from aesara.compile.mode import Mode
+from aesara.configdefaults import config
+from aesara.graph.opt import check_stack_trace
+from aesara.tensor.nnet import abstract_conv as conv
+from aesara.tensor.nnet import conv2d_transpose, corr, corr3d
+from aesara.tensor.nnet.abstract_conv import (
     AbstractConv2d,
     AbstractConv2d_gradInputs,
     AbstractConv2d_gradWeights,
@@ -26,9 +25,9 @@
     separable_conv2d,
     separable_conv3d,
 )
-from theano.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
-from theano.tensor.nnet.corr3d import Corr3dMM, Corr3dMMGradInputs, Corr3dMMGradWeights
-from theano.tensor.type import (
+from aesara.tensor.nnet.corr import CorrMM, CorrMM_gradInputs, CorrMM_gradWeights
+from aesara.tensor.nnet.corr3d import Corr3dMM, Corr3dMMGradInputs, Corr3dMMGradWeights
+from aesara.tensor.type import (
     TensorType,
     ftensor4,
     iscalar,
@@ -37,6 +36,7 @@
     tensor4,
     tensor5,
 )
+from tests import unittest_tools as utt
 
 
 def conv2d_corr(
@@ -289,7 +289,7 @@ def test_get_shape(self):
 class TestAssertConvShape:
     def test_basic(self):
         shape = tuple(iscalar() for i in range(4))
-        f = theano.function(shape, assert_conv_shape(shape))
+        f = aesara.function(shape, assert_conv_shape(shape))
 
         assert [1, 2, 3, 4] == f(1, 2, 3, 4)
         assert [0, 0, 1, 1] == f(0, 0, 1, 1)
@@ -312,7 +312,7 @@ def test_basic(self):
         s1 = iscalar()
         s2 = iscalar()
         expected_shape = [None, s1, s2, None]
-        f = theano.function([x, s1, s2], assert_shape(x, expected_shape))
+        f = aesara.function([x, s1, s2], assert_shape(x, expected_shape))
 
         v = np.zeros((3, 5, 7, 11), dtype="float32")
         assert 0 == np.sum(f(v, 5, 7))
@@ -337,7 +337,7 @@ def test_shape_check_conv2d(self):
         out = conv.abstract_conv2d(
             input, filters, input_shape=(3, 5, 7, 11), filter_shape=(7, 5, 3, 3)
         )
-        f = theano.function([input, filters], out)
+        f = aesara.function([input, filters], out)
         # mismatched input_shape
         with pytest.raises(AssertionError):
             f(
@@ -360,7 +360,7 @@ def test_shape_check_conv3d(self):
         out = conv.conv3d(
             input, filters, input_shape=(3, 5, 7, 11, 13), filter_shape=(7, 5, 3, 3, 3)
         )
-        f = theano.function([input, filters], out)
+        f = aesara.function([input, filters], out)
         # mismatched input_shape
         with pytest.raises(AssertionError):
             f(
@@ -385,7 +385,7 @@ def test_shape_check_conv2d_grad_wrt_inputs(self):
             input_shape=(None, None, 7, 11),
             filter_shape=(7, 5, 3, 3),
         )
-        f = theano.function([output_grad, filters], out)
+        f = aesara.function([output_grad, filters], out)
         # mismatched filter_shape
         with pytest.raises(AssertionError):
             f(
@@ -405,7 +405,7 @@ def test_shape_check_conv3d_grad_wrt_inputs(self):
             input_shape=(None, None, 7, 11, 13),
             filter_shape=(7, 5, 3, 3, 3),
         )
-        f = theano.function([output_grad, filters], out)
+        f = aesara.function([output_grad, filters], out)
         # mismatched filter_shape
         with pytest.raises(AssertionError):
             f(
@@ -424,7 +424,7 @@ def test_shape_check_conv2d_grad_wrt_weights(self):
             filter_shape=(None, None, 3, 3),
             input_shape=(3, 5, 7, 11),
         )
-        f = theano.function([input, output_grad], out)
+        f = aesara.function([input, output_grad], out)
         # mismatched filter_shape
         with pytest.raises(AssertionError):
             f(
@@ -444,7 +444,7 @@ def test_shape_check_conv3d_grad_wrt_weights(self):
             filter_shape=(None, None, 3, 3, 3),
             input_shape=(3, 5, 7, 11, 13),
         )
-        f = theano.function([input, output_grad], out)
+        f = aesara.function([input, output_grad], out)
         # mismatched filter_shape
         with pytest.raises(AssertionError):
             f(
@@ -543,8 +543,8 @@ def run_fwd(
             filter_dilation=filter_dilation,
         )
 
-        f_ref = theano.function([], c_ref, mode="FAST_RUN")
-        f = theano.function([], c, mode=mode)
+        f_ref = aesara.function([], c_ref, mode="FAST_RUN")
+        f = aesara.function([], c, mode=mode)
 
         if target_op is not None:
             assert any([isinstance(n.op, target_op) for n in f.maker.fgraph.toposort()])
@@ -628,8 +628,8 @@ def run_gradweight(
             conv_mode=conv_mode,
             filter_dilation=filter_dilation,
         )
-        f = theano.function([], c, mode=mode)
-        f_ref = theano.function([], c_ref, mode="FAST_RUN")
+        f = aesara.function([], c, mode=mode)
+        f_ref = aesara.function([], c_ref, mode="FAST_RUN")
 
         if target_op is not None:
             assert any([isinstance(n.op, target_op) for n in f.maker.fgraph.toposort()])
@@ -699,7 +699,7 @@ def run_gradinput(
             filter_dilation=filter_dilation,
         )
         c = c(filters, output, inputs_shape[2:])
-        f = theano.function([], c, mode=mode)
+        f = aesara.function([], c, mode=mode)
 
         # ref is set to None for the inconsistent-shape tests.
         # The reference function also raises an exception, which would
@@ -714,7 +714,7 @@ def run_gradinput(
                 conv_mode=conv_mode,
                 filter_dilation=filter_dilation,
             )
-            f_ref = theano.function([], c_ref, mode="FAST_RUN")
+            f_ref = aesara.function([], c_ref, mode="FAST_RUN")
 
         if target_op is not None:
             assert any([isinstance(n.op, target_op) for n in f.maker.fgraph.toposort()])
@@ -801,7 +801,7 @@ def setup_class(cls):
         cls.default_filter_flip = True
         cls.provide_shape = [True, False]
         cls.default_provide_shape = True
-        cls.shared = staticmethod(theano.compile.shared)
+        cls.shared = staticmethod(aesara.compile.shared)
 
     def run_test_case_gi(self, *args, **kwargs):
         raise NotImplementedError()
@@ -1030,7 +1030,7 @@ def test_all(self):
 
 
 @pytest.mark.skipif(
-    config.cxx == "" or not theano.tensor.nnet.abstract_conv.imported_scipy_signal,
+    config.cxx == "" or not aesara.tensor.nnet.abstract_conv.imported_scipy_signal,
     reason="SciPy and cxx needed",
 )
 class TestAbstractConvNoOptim(BaseTestConv2d):
@@ -1157,7 +1157,7 @@ def setup_class(cls):
         cls.default_filter_flip = True
         cls.provide_shape = [True, False]
         cls.default_provide_shape = True
-        cls.shared = staticmethod(theano.compile.shared)
+        cls.shared = staticmethod(aesara.compile.shared)
 
     def test_gradinput_arbitrary_output_shapes(self):
         # this computes the grad wrt inputs for an output shape
@@ -1448,7 +1448,7 @@ def test_grad_types(self):
         out_shape = lvector()
 
         output = conv.abstract_conv2d(input, filters)
-        grad_input, grad_filters = theano.grad(output.sum(), wrt=(input, filters))
+        grad_input, grad_filters = aesara.grad(output.sum(), wrt=(input, filters))
         assert grad_input.type == input.type, (
             grad_input,
             grad_input.type,
@@ -1463,7 +1463,7 @@ def test_grad_types(self):
         )
 
         grad_filters = conv.AbstractConv2d_gradWeights()(input, topgrad, out_shape)
-        grad_input, grad_topgrad = theano.grad(grad_filters.sum(), wrt=(input, topgrad))
+        grad_input, grad_topgrad = aesara.grad(grad_filters.sum(), wrt=(input, topgrad))
 
         assert grad_input.type == input.type, (
             grad_input,
@@ -1479,7 +1479,7 @@ def test_grad_types(self):
         )
 
         grad_input = conv.AbstractConv2d_gradInputs()(filters, topgrad, out_shape)
-        grad_filters, grad_topgrad = theano.grad(
+        grad_filters, grad_topgrad = aesara.grad(
             grad_input.sum(), wrt=(filters, topgrad)
         )
 
@@ -1506,7 +1506,7 @@ def test_constant_input(self):
 
         # Check the forward Op
         output = conv.abstract_conv2d(constant_tensor, filters)
-        grad_filters = theano.grad(output.sum(), wrt=filters)
+        grad_filters = aesara.grad(output.sum(), wrt=filters)
         assert grad_filters.type == filters.type, (
             grad_filters,
             grad_filters.type,
@@ -1515,7 +1515,7 @@ def test_constant_input(self):
         )
 
         output = conv.abstract_conv2d(input, constant_tensor)
-        grad_input = theano.grad(output.sum(), wrt=input)
+        grad_input = aesara.grad(output.sum(), wrt=input)
         assert grad_input.type == input.type, (
             grad_input,
             grad_input.type,
@@ -1527,7 +1527,7 @@ def test_constant_input(self):
         grad_filters = conv.AbstractConv2d_gradWeights()(
             constant_tensor, topgrad, out_shape
         )
-        grad_topgrad = theano.grad(grad_filters.sum(), wrt=topgrad)
+        grad_topgrad = aesara.grad(grad_filters.sum(), wrt=topgrad)
         assert grad_topgrad.type == topgrad.type, (
             grad_topgrad,
             grad_topgrad.type,
@@ -1538,7 +1538,7 @@ def test_constant_input(self):
         grad_filters = conv.AbstractConv2d_gradWeights()(
             input, constant_tensor, out_shape
         )
-        grad_input = theano.grad(grad_filters.sum(), wrt=input)
+        grad_input = aesara.grad(grad_filters.sum(), wrt=input)
         assert grad_input.type == input.type, (
             grad_input,
             grad_input.type,
@@ -1550,7 +1550,7 @@ def test_constant_input(self):
         grad_input = conv.AbstractConv2d_gradInputs()(
             constant_tensor, topgrad, out_shape
         )
-        grad_topgrad = theano.grad(grad_input.sum(), wrt=topgrad)
+        grad_topgrad = aesara.grad(grad_input.sum(), wrt=topgrad)
         assert grad_topgrad.type == topgrad.type, (
             grad_topgrad,
             grad_topgrad.type,
@@ -1561,7 +1561,7 @@ def test_constant_input(self):
         grad_input = conv.AbstractConv2d_gradInputs()(
             filters, constant_tensor, out_shape
         )
-        grad_filters = theano.grad(grad_input.sum(), wrt=filters)
+        grad_filters = aesara.grad(grad_input.sum(), wrt=filters)
         assert grad_filters.type == filters.type, (
             grad_filters,
             grad_filters.type,
@@ -1571,9 +1571,9 @@ def test_constant_input(self):
 
 
 class TestBilinearUpsampling:
-    # If config.blas__ldflags is empty, Theano will use
+    # If config.blas__ldflags is empty, Aesara will use
     # a NumPy C implementation of [sd]gemm_.
-    compile_mode = theano.compile.mode.get_default_mode()
+    compile_mode = aesara.compile.mode.get_default_mode()
     if config.mode == "FAST_COMPILE":
         compile_mode = compile_mode.excluding("conv_gemm")
         compile_mode = compile_mode.excluding("AbstractConvCheck")
@@ -1608,13 +1608,13 @@ def test_bilinear_kernel_2D(self):
         for ratio in [2, 3, 4, 5, 6, 7, 8, 9]:
             # getting the un-normalized kernel
             kernel = bilinear_kernel_2D(ratio=ratio, normalize=False)
-            f = theano.function([], kernel)
+            f = aesara.function([], kernel)
             kernel_2D = self.numerical_kernel_2D(ratio)
             utt.assert_allclose(kernel_2D, f())
 
             # getting the normalized kernel
             kernel = bilinear_kernel_2D(ratio=ratio, normalize=True)
-            f = theano.function([], kernel)
+            f = aesara.function([], kernel)
             kernel_2D = kernel_2D / float(ratio ** 2)
             utt.assert_allclose(kernel_2D, f())
 
@@ -1627,22 +1627,22 @@ def test_bilinear_kernel_1D(self):
 
         rat = iscalar()
         kernel_ten = bilinear_kernel_1D(ratio=rat, normalize=False)
-        f_ten = theano.function([rat], kernel_ten)
+        f_ten = aesara.function([rat], kernel_ten)
 
         kernel_ten_norm = bilinear_kernel_1D(ratio=rat, normalize=True)
-        f_ten_norm = theano.function([rat], kernel_ten_norm)
+        f_ten_norm = aesara.function([rat], kernel_ten_norm)
 
         for ratio in [2, 3, 4, 5, 6, 7, 8, 9]:
             # getting the un-normalized kernel
             kernel = bilinear_kernel_1D(ratio=ratio, normalize=False)
-            f = theano.function([], kernel)
+            f = aesara.function([], kernel)
             kernel_1D = self.numerical_kernel_1D(ratio)
             utt.assert_allclose(kernel_1D, f())
             utt.assert_allclose(kernel_1D, f_ten(ratio))
 
             # getting the normalized kernel
             kernel = bilinear_kernel_1D(ratio=ratio, normalize=True)
-            f = theano.function([], kernel)
+            f = aesara.function([], kernel)
             kernel_1D = kernel_1D / float(ratio)
             utt.assert_allclose(kernel_1D, f())
             utt.assert_allclose(kernel_1D, f_ten_norm(ratio))
@@ -1728,7 +1728,7 @@ def test_bilinear_upsampling_1D(self):
                 num_input_channels=1,
                 use_1D_kernel=True,
             )
-            f = theano.function([], bilin_mat, mode=self.compile_mode)
+            f = aesara.function([], bilin_mat, mode=self.compile_mode)
             up_mat_2d = self.get_upsampled_twobytwo_mat(input_x, ratio)
             utt.assert_allclose(f(), up_mat_2d, rtol=1e-06)
 
@@ -1750,7 +1750,7 @@ def test_bilinear_upsampling_reshaping(self):
                     num_input_channels=None,
                     use_1D_kernel=use_1D_kernel,
                 )
-                f = theano.function([], bilin_mat, mode=self.compile_mode)
+                f = aesara.function([], bilin_mat, mode=self.compile_mode)
                 up_mat_2d = self.get_upsampled_twobytwo_mat(input_x, ratio)
                 utt.assert_allclose(f(), up_mat_2d, rtol=1e-06)
 
@@ -1776,8 +1776,8 @@ def test_compare_1D_and_2D_upsampling_values(self):
             num_input_channels=4,
             use_1D_kernel=False,
         )
-        f_1D = theano.function([], mat_1D, mode=self.compile_mode)
-        f_2D = theano.function([], mat_2D, mode=self.compile_mode)
+        f_1D = aesara.function([], mat_1D, mode=self.compile_mode)
+        f_2D = aesara.function([], mat_2D, mode=self.compile_mode)
         utt.assert_allclose(f_1D(), f_2D(), rtol=1e-06)
 
         # checking upsampling with ratio 8
@@ -1796,8 +1796,8 @@ def test_compare_1D_and_2D_upsampling_values(self):
             num_input_channels=11,
             use_1D_kernel=False,
         )
-        f_1D = theano.function([], mat_1D, mode=self.compile_mode)
-        f_2D = theano.function([], mat_2D, mode=self.compile_mode)
+        f_1D = aesara.function([], mat_1D, mode=self.compile_mode)
+        f_2D = aesara.function([], mat_2D, mode=self.compile_mode)
         utt.assert_allclose(f_1D(), f_2D(), rtol=1e-06)
 
     def test_fractional_bilinear_upsampling(self):
@@ -1834,7 +1834,7 @@ def test_fractional_bilinear_upsampling(self):
                 ]
             ]
         ).astype(config.floatX)
-        f_up_x = theano.function([], up_x, mode=self.compile_mode)
+        f_up_x = aesara.function([], up_x, mode=self.compile_mode)
         utt.assert_allclose(f_up_x(), num_up_x, rtol=1e-6)
 
     def test_fractional_bilinear_upsampling_shape(self):
@@ -1843,7 +1843,7 @@ def test_fractional_bilinear_upsampling_shape(self):
         z = bilinear_upsampling(
             tt.as_tensor_variable(x), frac_ratio=resize, use_1D_kernel=False
         )
-        out = theano.function([], z.shape, mode="FAST_RUN")()
+        out = aesara.function([], z.shape, mode="FAST_RUN")()
         utt.assert_allclose(out, (1, 1, 240, 240))
 
 
@@ -1860,12 +1860,12 @@ def test_interface(self):
         mode = self.mode
         if config.mode == "FAST_COMPILE":
             mode = (
-                theano.compile.get_mode(mode)
+                aesara.compile.get_mode(mode)
                 .excluding("conv_gemm")
                 .excluding("AbstractConvCheck")
             )
 
-        output = theano.function(
+        output = aesara.function(
             inputs=[],
             outputs=conv2d_transpose(
                 input=tt.ones((2, 2, 4, 4)),
@@ -1935,7 +1935,7 @@ def test_conv2d_grad_wrt_inputs(self):
                             fltr_shape
                         ).astype(config.floatX)
                         out_grad_shape = (
-                            theano.tensor.nnet.abstract_conv.get_conv_output_shape(
+                            aesara.tensor.nnet.abstract_conv.get_conv_output_shape(
                                 image_shape=in_shape,
                                 kernel_shape=fltr_shape,
                                 border_mode=bm,
@@ -1945,7 +1945,7 @@ def test_conv2d_grad_wrt_inputs(self):
                         out_grad_val = self.random_stream.random_sample(
                             out_grad_shape
                         ).astype(config.floatX)
-                        conv_out = theano.tensor.nnet.conv2d(
+                        conv_out = aesara.tensor.nnet.conv2d(
                             self.x,
                             filters=self.w,
                             border_mode=bm,
@@ -1954,17 +1954,17 @@ def test_conv2d_grad_wrt_inputs(self):
                             filter_shape=fltr_shape,
                             filter_flip=ff,
                         )
-                        conv_grad = theano.grad(
+                        conv_grad = aesara.grad(
                             conv_out.sum(),
                             wrt=self.x,
                             known_grads={conv_out: self.output_grad},
                         )
-                        f_old = theano.function(
+                        f_old = aesara.function(
                             [self.x, self.w, self.output_grad], conv_grad
                         )
 
                         conv_wrt_i_out = (
-                            theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
+                            aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs(
                                 output_grad=self.output_grad_wrt,
                                 filters=self.w,
                                 border_mode=bm,
@@ -1974,7 +1974,7 @@ def test_conv2d_grad_wrt_inputs(self):
                                 filter_flip=ff,
                             )
                         )
-                        f_new = theano.function(
+                        f_new = aesara.function(
                             [self.w, self.output_grad_wrt], conv_wrt_i_out
                         )
 
@@ -2001,7 +2001,7 @@ def test_conv2d_grad_wrt_weights(self):
                             fltr_shape
                         ).astype(config.floatX)
                         out_grad_shape = (
-                            theano.tensor.nnet.abstract_conv.get_conv_output_shape(
+                            aesara.tensor.nnet.abstract_conv.get_conv_output_shape(
                                 image_shape=in_shape,
                                 kernel_shape=fltr_shape,
                                 border_mode=bm,
@@ -2011,7 +2011,7 @@ def test_conv2d_grad_wrt_weights(self):
                         out_grad_val = self.random_stream.random_sample(
                             out_grad_shape
                         ).astype(config.floatX)
-                        conv_out = theano.tensor.nnet.conv2d(
+                        conv_out = aesara.tensor.nnet.conv2d(
                             self.x,
                             filters=self.w,
                             border_mode=bm,
@@ -2020,17 +2020,17 @@ def test_conv2d_grad_wrt_weights(self):
                             filter_shape=fltr_shape,
                             filter_flip=ff,
                         )
-                        conv_grad = theano.grad(
+                        conv_grad = aesara.grad(
                             conv_out.sum(),
                             wrt=self.w,
                             known_grads={conv_out: self.output_grad},
                         )
-                        f_old = theano.function(
+                        f_old = aesara.function(
                             [self.x, self.w, self.output_grad], conv_grad
                         )
 
                         conv_wrt_w_out = (
-                            theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_weights(
+                            aesara.tensor.nnet.abstract_conv.conv2d_grad_wrt_weights(
                                 self.x,
                                 output_grad=self.output_grad_wrt,
                                 border_mode=bm,
@@ -2040,7 +2040,7 @@ def test_conv2d_grad_wrt_weights(self):
                                 filter_flip=ff,
                             )
                         )
-                        f_new = theano.function(
+                        f_new = aesara.function(
                             [self.x, self.output_grad_wrt], conv_wrt_w_out
                         )
                         utt.assert_allclose(
@@ -2050,16 +2050,16 @@ def test_conv2d_grad_wrt_weights(self):
 
 
 @pytest.mark.skipif(
-    config.cxx == "" or not theano.tensor.nnet.abstract_conv.imported_scipy_signal,
+    config.cxx == "" or not aesara.tensor.nnet.abstract_conv.imported_scipy_signal,
     reason="SciPy and cxx needed",
 )
 class TestGroupedConvNoOptim:
-    conv = theano.tensor.nnet.abstract_conv.AbstractConv2d
-    conv_gradw = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv_gradi = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
-    conv_op = theano.tensor.nnet.abstract_conv.AbstractConv2d
-    conv_gradw_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv_gradi_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv = aesara.tensor.nnet.abstract_conv.AbstractConv2d
+    conv_gradw = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
+    conv_gradi = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d
+    conv_gradw_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
+    conv_gradi_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
     mode = Mode(optimizer=None)
     is_dnn = False
 
@@ -2101,7 +2101,7 @@ def test_fwd(self):
             )
             grouped_conv_output = grouped_conv_op(img_sym, kern_sym)
 
-            grouped_func = theano.function(
+            grouped_func = aesara.function(
                 [img_sym, kern_sym], grouped_conv_output, mode=self.mode
             )
             assert any(
@@ -2119,7 +2119,7 @@ def test_fwd(self):
                 subsample=self.subsample,
                 filter_dilation=self.filter_dilation,
             )
-            ref_func = theano.function(
+            ref_func = aesara.function(
                 [img_sym, kern_sym], ref_conv_op, mode=self.ref_mode
             )
             ref_concat_output = [
@@ -2156,7 +2156,7 @@ def test_gradweights(self):
             grouped_conv_output = grouped_convgrad_op(
                 img_sym, top_sym, tt.as_tensor_variable(kshp[-self.convdim :])
             )
-            grouped_func = theano.function(
+            grouped_func = aesara.function(
                 [img_sym, top_sym], grouped_conv_output, mode=self.mode
             )
             assert any(
@@ -2175,7 +2175,7 @@ def test_gradweights(self):
                 subsample=self.subsample,
                 filter_dilation=self.filter_dilation,
             )
-            ref_func = theano.function(
+            ref_func = aesara.function(
                 [img_sym, top_sym], ref_conv_op, mode=self.ref_mode
             )
             ref_concat_output = [
@@ -2219,7 +2219,7 @@ def test_gradinputs(self):
             grouped_conv_output = grouped_convgrad_op(
                 kern_sym, top_sym, tt.as_tensor_variable(imshp[-self.convdim :])
             )
-            grouped_func = theano.function(
+            grouped_func = aesara.function(
                 [kern_sym, top_sym], grouped_conv_output, mode=self.mode
             )
             assert any(
@@ -2238,7 +2238,7 @@ def test_gradinputs(self):
                 subsample=self.subsample,
                 filter_dilation=self.filter_dilation,
             )
-            ref_func = theano.function(
+            ref_func = aesara.function(
                 [kern_sym, top_sym], ref_conv_op, mode=self.ref_mode
             )
             ref_concat_output = [
@@ -2260,16 +2260,16 @@ def conv_gradinputs(filters_val, output_val):
 
 
 @pytest.mark.skipif(
-    config.cxx == "" or not theano.tensor.nnet.abstract_conv.imported_scipy_signal,
+    config.cxx == "" or not aesara.tensor.nnet.abstract_conv.imported_scipy_signal,
     reason="SciPy and cxx needed",
 )
 class TestGroupedConv3dNoOptim(TestGroupedConvNoOptim):
-    conv = theano.tensor.nnet.abstract_conv.AbstractConv3d
-    conv_gradw = theano.tensor.nnet.abstract_conv.AbstractConv3d_gradWeights
-    conv_gradi = theano.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs
-    conv_op = theano.tensor.nnet.abstract_conv.AbstractConv3d
-    conv_gradw_op = theano.tensor.nnet.abstract_conv.AbstractConv3d_gradWeights
-    conv_gradi_op = theano.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs
+    conv = aesara.tensor.nnet.abstract_conv.AbstractConv3d
+    conv_gradw = aesara.tensor.nnet.abstract_conv.AbstractConv3d_gradWeights
+    conv_gradi = aesara.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs
+    conv_op = aesara.tensor.nnet.abstract_conv.AbstractConv3d
+    conv_gradw_op = aesara.tensor.nnet.abstract_conv.AbstractConv3d_gradWeights
+    conv_gradi_op = aesara.tensor.nnet.abstract_conv.AbstractConv3d_gradInputs
     mode = Mode(optimizer=None)
 
     def setup_method(self):
@@ -2375,7 +2375,7 @@ def test_interface2d(self):
         pfilter_sym = tensor4("p")
 
         sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, self.x.shape[1])
-        fun = theano.function(
+        fun = aesara.function(
             [x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
         )
 
@@ -2397,7 +2397,7 @@ def test_interface2d(self):
             depthwise_filter_shape=self.depthwise_filter.shape,
             pointwise_filter_shape=self.pointwise_filter.shape,
         )
-        fun = theano.function(
+        fun = aesara.function(
             [x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
         )
         top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
@@ -2407,7 +2407,7 @@ def test_interface2d(self):
         sep_op = separable_conv2d(
             x_sym, dfilter_sym, pfilter_sym, self.x.shape[1], subsample=(2, 2)
         )
-        fun = theano.function(
+        fun = aesara.function(
             [x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
         )
         top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
@@ -2419,7 +2419,7 @@ def test_interface2d(self):
         sep_op = separable_conv2d(
             x_sym, dfilter_sym, pfilter_sym, self.x.shape[1], border_mode="full"
         )
-        fun = theano.function(
+        fun = aesara.function(
             [x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
         )
         top = fun(self.x[:, :, :3, :3], self.depthwise_filter, self.pointwise_filter)
@@ -2443,7 +2443,7 @@ def test_interface3d(self):
         pfilter_sym = tensor5("p")
 
         sep_op = separable_conv3d(x_sym, dfilter_sym, pfilter_sym, x.shape[1])
-        fun = theano.function(
+        fun = aesara.function(
             [x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
         )
 
@@ -2463,7 +2463,7 @@ def test_interface3d(self):
             depthwise_filter_shape=depthwise_filter.shape,
             pointwise_filter_shape=pointwise_filter.shape,
         )
-        fun = theano.function(
+        fun = aesara.function(
             [x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
         )
         top = fun(x, depthwise_filter, pointwise_filter)
@@ -2473,7 +2473,7 @@ def test_interface3d(self):
         sep_op = separable_conv3d(
             x_sym, dfilter_sym, pfilter_sym, x.shape[1], subsample=(2, 2, 2)
         )
-        fun = theano.function(
+        fun = aesara.function(
             [x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
         )
         top = fun(x, depthwise_filter, pointwise_filter)
@@ -2491,7 +2491,7 @@ def test_interface3d(self):
         sep_op = separable_conv3d(
             x_sym, dfilter_sym, pfilter_sym, x.shape[1], border_mode="full"
         )
-        fun = theano.function(
+        fun = aesara.function(
             [x_sym, dfilter_sym, pfilter_sym], sep_op, mode="FAST_RUN"
         )
         top = fun(x[:, :, :3, :3, :3], depthwise_filter, pointwise_filter)
@@ -2499,16 +2499,16 @@ def test_interface3d(self):
 
 
 @pytest.mark.skipif(
-    config.cxx == "" or not theano.tensor.nnet.abstract_conv.imported_scipy_signal,
+    config.cxx == "" or not aesara.tensor.nnet.abstract_conv.imported_scipy_signal,
     reason="SciPy and cxx needed",
 )
 class TestUnsharedConv:
-    conv2d = theano.tensor.nnet.abstract_conv.AbstractConv2d
-    conv2d_gradw = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv2d_gradi = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
-    conv2d_op = theano.tensor.nnet.abstract_conv.AbstractConv2d
-    conv2d_gradw_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv2d_gradi_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv2d = aesara.tensor.nnet.abstract_conv.AbstractConv2d
+    conv2d_gradw = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv2d_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d
+    conv2d_gradw_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
 
     mode = Mode(optimizer="None")
 
@@ -2557,7 +2557,7 @@ def test_fwd(self):
                 unshared=True,
             )
             unshared_out_sym = unshared_conv_op(img_sym, kern_sym)
-            unshared_func = theano.function(
+            unshared_func = aesara.function(
                 [img_sym, kern_sym], unshared_out_sym, mode=self.mode
             )
             assert any(
@@ -2578,7 +2578,7 @@ def test_fwd(self):
                 unshared=False,
             )
             ref_out_sym = ref_conv_op(img_sym, ref_kern_sym)
-            ref_func = theano.function(
+            ref_func = aesara.function(
                 [img_sym, ref_kern_sym], ref_out_sym, mode=self.mode
             )
 
@@ -2619,7 +2619,7 @@ def test_gradweight(self):
             unshared_out_sym = unshared_conv_op(
                 img_sym, top_sym, tt.as_tensor_variable(kshp[-2:])
             )
-            unshared_func = theano.function(
+            unshared_func = aesara.function(
                 [img_sym, top_sym], unshared_out_sym, mode=self.mode
             )
             assert any(
@@ -2642,7 +2642,7 @@ def test_gradweight(self):
             ref_out_sym = ref_conv_op(
                 img_sym, top_sym, tt.as_tensor_variable(single_kshp[-2:])
             )
-            ref_func = theano.function([img_sym, top_sym], ref_out_sym, mode=self.mode)
+            ref_func = aesara.function([img_sym, top_sym], ref_out_sym, mode=self.mode)
 
             for i in range(0, topshp[2]):
                 for j in range(0, topshp[3]):
@@ -2689,7 +2689,7 @@ def test_gradinput(self):
             unshared_out_sym = unshared_conv_op(
                 kern_sym, top_sym, tt.as_tensor_variable(imshp[-2:])
             )
-            unshared_func = theano.function(
+            unshared_func = aesara.function(
                 [kern_sym, top_sym], unshared_out_sym, mode=self.mode
             )
             assert any(
@@ -2710,7 +2710,7 @@ def test_gradinput(self):
             ref_out_sym = ref_conv_op(
                 ref_kern_sym, top_sym, tt.as_tensor_variable(imshp[-2:])
             )
-            ref_func = theano.function(
+            ref_func = aesara.function(
                 [ref_kern_sym, top_sym], ref_out_sym, mode=self.mode
             )
 
@@ -2735,12 +2735,12 @@ def conv_gradinputs(filters_val, output_val):
 
 
 class TestAsymmetricPadding:
-    conv2d = theano.tensor.nnet.abstract_conv.AbstractConv2d
-    conv2d_gradw = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv2d_gradi = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
-    conv2d_op = theano.tensor.nnet.abstract_conv.AbstractConv2d
-    conv2d_gradw_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
-    conv2d_gradi_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv2d = aesara.tensor.nnet.abstract_conv.AbstractConv2d
+    conv2d_gradw = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv2d_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d
+    conv2d_gradw_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi_op = aesara.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
 
     mode = Mode(optimizer="None")
 
@@ -2750,7 +2750,7 @@ class TestAsymmetricPadding:
     border_mode = [((1, 2), (2, 1)), ((1, 1), (0, 3)), ((2, 1), (0, 0))]
 
     @pytest.mark.skipif(
-        config.cxx == "" or not theano.tensor.nnet.abstract_conv.imported_scipy_signal,
+        config.cxx == "" or not aesara.tensor.nnet.abstract_conv.imported_scipy_signal,
         reason="SciPy and cxx needed",
     )
     def test_fwd(self):
@@ -2765,7 +2765,7 @@ def test_fwd(self):
                 border_mode=pad, subsample=(1, 1), filter_dilation=(1, 1)
             )
             asymmetric_out_sym = asymmetric_conv_op(img_sym, kern_sym)
-            asymmetric_func = theano.function(
+            asymmetric_func = aesara.function(
                 [img_sym, kern_sym], asymmetric_out_sym, mode=self.mode
             )
             assert any(
@@ -2780,7 +2780,7 @@ def test_fwd(self):
                 border_mode="valid", subsample=(1, 1), filter_dilation=(1, 1)
             )
             ref_out_sym = ref_conv_op(img_sym, kern_sym)
-            ref_func = theano.function([img_sym, kern_sym], ref_out_sym, mode=self.mode)
+            ref_func = aesara.function([img_sym, kern_sym], ref_out_sym, mode=self.mode)
 
             exp_imshp = (
                 imshp[0],
@@ -2800,7 +2800,7 @@ def test_fwd(self):
             utt.verify_grad(asymmetric_conv_op, [img, kern], mode=self.mode, eps=1)
 
     @pytest.mark.skipif(
-        config.cxx == "" or not theano.tensor.nnet.abstract_conv.imported_scipy_signal,
+        config.cxx == "" or not aesara.tensor.nnet.abstract_conv.imported_scipy_signal,
         reason="SciPy and cxx needed",
     )
     def test_gradweight(self):
@@ -2817,7 +2817,7 @@ def test_gradweight(self):
                 border_mode=pad, subsample=(1, 1), filter_dilation=(1, 1)
             )
             asymmetric_out_sym = asymmetric_conv_op(img_sym, top_sym, kshp[-2:])
-            asymmetric_func = theano.function(
+            asymmetric_func = aesara.function(
                 [img_sym, top_sym], asymmetric_out_sym, mode=self.mode
             )
             assert any(
@@ -2832,7 +2832,7 @@ def test_gradweight(self):
                 border_mode="valid", subsample=(1, 1), filter_dilation=(1, 1)
             )
             ref_out_sym = ref_conv_op(img_sym, top_sym, kshp[-2:])
-            ref_func = theano.function([img_sym, top_sym], ref_out_sym, mode=self.mode)
+            ref_func = aesara.function([img_sym, top_sym], ref_out_sym, mode=self.mode)
 
             exp_imshp = (
                 imshp[0],
@@ -2857,7 +2857,7 @@ def conv_gradweight(inputs_val, output_val):
             utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
 
     @pytest.mark.skipif(
-        config.cxx == "" or not theano.tensor.nnet.abstract_conv.imported_scipy_signal,
+        config.cxx == "" or not aesara.tensor.nnet.abstract_conv.imported_scipy_signal,
         reason="SciPy and cxx needed",
     )
     def test_gradinput(self):
@@ -2874,7 +2874,7 @@ def test_gradinput(self):
                 border_mode=pad, subsample=(1, 1), filter_dilation=(1, 1)
             )
             asymmetric_out_sym = asymmetric_conv_op(kern_sym, top_sym, imshp[-2:])
-            asymmetric_func = theano.function(
+            asymmetric_func = aesara.function(
                 [kern_sym, top_sym], asymmetric_out_sym, mode=self.mode
             )
             assert any(
@@ -2893,7 +2893,7 @@ def test_gradinput(self):
                 imshp[3] + pad[1][0] + pad[1][1],
             ]
             ref_out_sym = ref_conv_op(kern_sym, top_sym, exp_imshp)
-            ref_func = theano.function([kern_sym, top_sym], ref_out_sym, mode=self.mode)
+            ref_func = aesara.function([kern_sym, top_sym], ref_out_sym, mode=self.mode)
 
             ref_output = ref_func(kern, top)
 
@@ -2934,7 +2934,7 @@ class TestCausalConv:
     ).astype(config.floatX)
 
     @pytest.mark.skipif(
-        config.cxx == "" or not theano.tensor.nnet.abstract_conv.imported_scipy_signal,
+        config.cxx == "" or not aesara.tensor.nnet.abstract_conv.imported_scipy_signal,
         reason="SciPy and cxx needed",
     )
     def test_interface(self):
@@ -2944,7 +2944,7 @@ def test_interface(self):
             img_sym, kern_sym, self.kern.shape, filter_dilation=self.dilation
         )
 
-        causal_func = theano.function([img_sym, kern_sym], sym_out, mode=self.mode)
+        causal_func = aesara.function([img_sym, kern_sym], sym_out, mode=self.mode)
 
         output = causal_func(self.img, self.kern)
 
diff --git a/tests/tensor/nnet/test_basic.py b/tests/tensor/nnet/test_basic.py
index a1a92315dd..d45898be4d 100644
--- a/tests/tensor/nnet/test_basic.py
+++ b/tests/tensor/nnet/test_basic.py
@@ -1,25 +1,18 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from tests.tensor.utils import (
-    _good_broadcast_unary_normal_float_no_complex,
-    check_floatX,
-    makeBroadcastTester,
-    upcast_int8_nfunc,
-)
-from theano.compile.mode import OPT_FAST_RUN, optdb
-from theano.configdefaults import config
-from theano.gradient import grad
-from theano.graph.fg import FunctionGraph
-from theano.graph.opt import check_stack_trace
-from theano.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from theano.tensor.math import Argmax, add, argmax, dot, exp, log, max_and_argmax, mean
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tanh, true_div
-from theano.tensor.nnet.basic import (
+import aesara
+import aesara.tensor as tt
+from aesara.compile.mode import OPT_FAST_RUN, optdb
+from aesara.configdefaults import config
+from aesara.gradient import grad
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.opt import check_stack_trace
+from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise
+from aesara.tensor.math import Argmax, add, argmax, dot, exp, log, max_and_argmax, mean
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tanh, true_div
+from aesara.tensor.nnet.basic import (
     CrossentropyCategorical1Hot,
     CrossentropyCategorical1HotGrad,
     CrossentropySoftmax1HotWithBiasDx,
@@ -52,10 +45,10 @@
     softmax_with_bias,
     softsign,
 )
-from theano.tensor.nnet.sigm import sigmoid
-from theano.tensor.shape import shape_padleft, specify_shape
-from theano.tensor.subtensor import AdvancedSubtensor
-from theano.tensor.type import (
+from aesara.tensor.nnet.sigm import sigmoid
+from aesara.tensor.shape import shape_padleft, specify_shape
+from aesara.tensor.subtensor import AdvancedSubtensor
+from aesara.tensor.type import (
     dmatrix,
     dvector,
     fmatrix,
@@ -69,6 +62,13 @@
     vector,
     vectors,
 )
+from tests import unittest_tools as utt
+from tests.tensor.utils import (
+    _good_broadcast_unary_normal_float_no_complex,
+    check_floatX,
+    makeBroadcastTester,
+    upcast_int8_nfunc,
+)
 
 
 class TestSoftmax(utt.InferShapeTester):
@@ -100,7 +100,7 @@ def test_infer_shape(self):
 
     def test_vector(self):
         x = vector()
-        f = theano.function([x], softmax_op(x))
+        f = aesara.function([x], softmax_op(x))
 
         xv = np.random.randn(6).astype(config.floatX)
         assert np.allclose(f(xv), np.exp(xv) / np.exp(xv).sum())
@@ -142,10 +142,10 @@ def test_broadcast(self):
             [[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1]],
             dtype=config.floatX,
         )
-        W = theano.shared(value=initial_W, name="W")
-        vbias = theano.shared(value=0.1, name="vbias")  # 0.01
+        W = aesara.shared(value=initial_W, name="W")
+        vbias = aesara.shared(value=0.1, name="vbias")  # 0.01
         hid = vector("hid")
-        f = theano.function([hid], softmax_op(dot(hid, W.T) + vbias))
+        f = aesara.function([hid], softmax_op(dot(hid, W.T) + vbias))
         ops = [node.op for node in f.maker.fgraph.toposort()]
         assert softmax_with_bias not in ops
         assert softmax_op in ops
@@ -154,10 +154,10 @@ def test_broadcast(self):
         # print f.maker.fgraph.toposort()
 
     def test_softmax_with_bias_trace(self):
-        a = theano.shared(np.random.randn(3).astype(config.floatX))
-        b = theano.shared(np.float32(np.random.randn()))
+        a = aesara.shared(np.random.randn(3).astype(config.floatX))
+        b = aesara.shared(np.float32(np.random.randn()))
         sm = softmax(a + b)
-        f = theano.function([], sm)
+        f = aesara.function([], sm)
         assert check_stack_trace(f, ops_to_check="last")
 
     def test_infer_shape(self):
@@ -203,7 +203,7 @@ def f(a):
 
     def test_vector(self):
         x = vector()
-        f = theano.function([x], logsoftmax_op(x))
+        f = aesara.function([x], logsoftmax_op(x))
 
         xv = np.random.randn(6).astype(config.floatX)
         assert np.allclose(f(xv), np.log(np.exp(xv) / np.exp(xv).sum()))
@@ -216,7 +216,7 @@ def f(a):
 
     def test_allclose(self):
         m = config.mode
-        m = theano.compile.get_mode(m)
+        m = aesara.compile.get_mode(m)
         m.check_isfinite = False
         x, y = matrices("xy")
         # regular softmax and crossentropy
@@ -236,20 +236,20 @@ def test_allclose(self):
 
         # show equivalence of softmax and exponentiated numerically stable
         # log-softmax
-        f1 = theano.function([x], [sm, sm2])
+        f1 = aesara.function([x], [sm, sm2])
         sm_, sm2_ = f1(a)
         utt.assert_allclose(sm_, sm2_)
 
         # now show that the two versions result in the same crossentropy cost
         # this indicates that the forward function does provide some numerical
         # stability
-        f2 = theano.function([x, y], [cm, cm2], mode=m)
+        f2 = aesara.function([x, y], [cm, cm2], mode=m)
         cm_, cm2_ = f2(a, b)
         utt.assert_allclose(cm_, cm2_)
 
         # now, show that in the standard softmax case the gradients blow up
         # while in the log-softmax case they don't
-        f3 = theano.function([x, y], [grad_node])
+        f3 = aesara.function([x, y], [grad_node])
         grad_ = f3(a, b)
         assert not np.any(np.isnan(grad_))
 
@@ -266,7 +266,7 @@ def test_local_softmax_optimization(self):
         x, y = matrices("xy")
         sm = softmax(x)
         logsm = log(sm)
-        f = theano.function([x], logsm)
+        f = aesara.function([x], logsm)
         assert isinstance(f.maker.fgraph.outputs[0].owner.op, LogSoftmax)
         assert check_stack_trace(f, ops_to_check=LogSoftmax)
 
@@ -278,7 +278,7 @@ def test_local_softmax_grad_optimization_and_big_input(self):
         # Note that only the grad is checked.
 
         m = config.mode
-        m = theano.compile.get_mode(m)
+        m = aesara.compile.get_mode(m)
         m.check_isfinite = False
         # some inputs that are large to make the gradient explode in the non
         # optimized case
@@ -291,8 +291,8 @@ def myfunc(x):
 
         # We set step to 0.1 because for big values we need a big epsilon
         utt.verify_grad(myfunc, [a], eps=0.1, mode=m)
-        sa = theano.shared(a)
-        f = theano.function([], myfunc(sa))
+        sa = aesara.shared(a)
+        f = aesara.function([], myfunc(sa))
         assert check_stack_trace(f, ops_to_check="all")
 
     def test_logsoftmax_grad_true_div_elemwise(self):
@@ -425,7 +425,7 @@ def test_neg_idx(self):
         alvec_val = rng.randint(low=0, high=5, size=10)
         alvec_val[1] = -1
         out = CrossentropySoftmax1HotWithBiasDx()(advec, admat, alvec)
-        f = theano.function([advec, admat, alvec], out)
+        f = aesara.function([advec, admat, alvec], out)
         with pytest.raises(ValueError):
             f(advec_val, admat_val, alvec_val)
 
@@ -491,7 +491,7 @@ def test_neg_idx(self):
         alvec_val = rng.randint(low=0, high=5, size=3)
         alvec_val[1] = -1
         out = CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec)
-        f = theano.function([admat, advec, alvec], out)
+        f = aesara.function([admat, advec, alvec], out)
         with pytest.raises(ValueError):
             f(admat_val, advec_val, alvec_val)
 
@@ -500,7 +500,7 @@ class TestPrepend(utt.InferShapeTester):
     def test_prepend_constant(self):
         x = matrix("x")
         y = Prepend_scalar_constant_to_each_row(4.0)(x)
-        f = theano.function([x], y)
+        f = aesara.function([x], y)
         m = np.random.rand(3, 5).astype(config.floatX)
         my = f(m)
         assert my.shape == (3, 6)
@@ -510,7 +510,7 @@ def test_prepend_basic(self):
         """Test basic functionality."""
         x = matrix("x")
         y = Prepend_scalar_to_each_row()(5.0, x)
-        f = theano.function([x], y)
+        f = aesara.function([x], y)
         m = np.ones((3, 5), dtype="float32")
         my = f(m)
         assert my.shape == (3, 6)
@@ -560,7 +560,7 @@ def test_grad(self):
         one_of_n = lvector("one_of_n")
         op = crossentropy_categorical_1hot
         xe = op(x, one_of_n)
-        f = theano.function([x, one_of_n], xe)
+        f = aesara.function([x, one_of_n], xe)
         x_val = np.asarray([[0.4, 0.6, 0.0], [0.1, 0.8, 0.1]], dtype=config.floatX)
         xe_val = f(x_val, [0, 1])
         assert np.allclose(xe_val, -np.log([0.4, 0.8]))
@@ -1073,7 +1073,7 @@ def test_argmax_pushdown():
         assert isinstance(fgraph.toposort()[1].op, Softmax)
         assert isinstance(fgraph.toposort()[2].op, CAReduce)
         assert isinstance(
-            fgraph.toposort()[2].op.scalar_op, theano.scalar.ScalarMaximum
+            fgraph.toposort()[2].op.scalar_op, aesara.scalar.ScalarMaximum
         )
 
 
@@ -1104,7 +1104,7 @@ def test_argmax_pushdown_bias():
     assert len(fgraph.toposort()) == 2
     assert isinstance(fgraph.toposort()[0].op, SoftmaxWithBias)
     assert isinstance(fgraph.toposort()[1].op, CAReduce)
-    assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.ScalarMaximum)
+    assert isinstance(fgraph.toposort()[1].op.scalar_op, aesara.scalar.ScalarMaximum)
     assert check_stack_trace(fgraph, ops_to_check=(SoftmaxWithBias, CAReduce))
 
 
@@ -1125,7 +1125,7 @@ def test_asymptotic_32():
         y = lvector()
 
         c = categorical_crossentropy(softmax(x + x2), y)
-        f = theano.function([x, y, x2], [c.sum(), grad(c.sum(), x)], mode="FAST_RUN")
+        f = aesara.function([x, y, x2], [c.sum(), grad(c.sum(), x)], mode="FAST_RUN")
 
         xval = np.zeros((5, 5), dtype=dtype).astype(dtype)
         x2val = np.zeros(5, dtype=xval.dtype).astype(dtype)
@@ -1162,7 +1162,7 @@ class TestSoftmaxOpt:
     def setup_method(self):
         utt.seed_rng()
         self.rng = np.random.RandomState(utt.fetch_seed())
-        self.mode = theano.compile.mode.get_default_mode()
+        self.mode = aesara.compile.mode.get_default_mode()
         self.mode = self.mode.including("canonicalize")
 
     def test_basic(self):
@@ -1170,7 +1170,7 @@ def test_basic(self):
         p_y = exp(c) / exp(c).sum(axis=1).dimshuffle(0, "x")
 
         # test that function contains softmax and no div.
-        f = theano.function([c], p_y, mode=self.mode)
+        f = aesara.function([c], p_y, mode=self.mode)
 
         assert check_stack_trace(f, ops_to_check=softmax_op)
 
@@ -1186,7 +1186,7 @@ def test_basic_keepdims(self):
         p_y = exp(c) / exp(c).sum(axis=1, keepdims=True)
 
         # test that function contains softmax and no div.
-        f = theano.function([c], p_y, mode=self.mode)
+        f = aesara.function([c], p_y, mode=self.mode)
 
         assert check_stack_trace(f, ops_to_check=softmax_op)
 
@@ -1206,7 +1206,7 @@ def test_grad(self):
         w = matrix()
 
         with config.change_flags(warn__sum_div_dimshuffle_bug=False):
-            g = theano.function([c, w], grad((p_y * w).sum(), c))
+            g = aesara.function([c, w], grad((p_y * w).sum(), c))
 
         g_ops = [n.op for n in g.maker.fgraph.toposort()]
 
@@ -1223,11 +1223,11 @@ def test_transpose_basic(self):
         p_y = exp(c) / exp(c).sum(axis=0)
 
         # test that function contains softmax and no div.
-        theano.function([c], p_y)
+        aesara.function([c], p_y)
 
         # test that function contains softmax and no div.
         with config.change_flags(warn__sum_div_dimshuffle_bug=False):
-            theano.function([c], grad(p_y.sum(), c))
+            aesara.function([c], grad(p_y.sum(), c))
 
     @pytest.mark.skip(reason="Optimization not enabled for the moment")
     def test_1d_basic(self):
@@ -1236,44 +1236,44 @@ def test_1d_basic(self):
         p_y = exp(c) / exp(c).sum()
 
         # test that function contains softmax and no div.
-        theano.function([c], p_y)
+        aesara.function([c], p_y)
 
         # test that function contains softmax and no div.
         with config.change_flags(warn__sum_div_dimshuffle_bug=False):
-            theano.function([c], grad(p_y.sum(), c))
+            aesara.function([c], grad(p_y.sum(), c))
 
 
 def test_softmax_graph():
     rng = np.random.RandomState(utt.fetch_seed())
-    x = theano.shared(rng.normal(size=(3, 4)))
+    x = aesara.shared(rng.normal(size=(3, 4)))
 
     def f(inputs):
         y = softmax_graph(x)
-        return theano.grad(None, x, known_grads={y: inputs})
+        return aesara.grad(None, x, known_grads={y: inputs})
 
     utt.verify_grad(f, [rng.rand(3, 4)])
 
 
 def test_grad_softmax_grad():
     rng = np.random.RandomState(utt.fetch_seed())
-    x = theano.shared(rng.normal(size=(3, 4)))
+    x = aesara.shared(rng.normal(size=(3, 4)))
 
     def f(inputs):
         y = softmax_op(x)
-        return theano.grad(None, x, known_grads={y: inputs})
+        return aesara.grad(None, x, known_grads={y: inputs})
 
     utt.verify_grad(f, [rng.rand(3, 4)])
 
 
 def test_stabilize_log_softmax():
-    mode = theano.compile.mode.get_default_mode()
+    mode = aesara.compile.mode.get_default_mode()
     mode = mode.including("local_log_softmax", "specialize")
 
     x = matrix()
     y = softmax(x)
     z = log(y)
 
-    f = theano.function([x], z, mode=mode)
+    f = aesara.function([x], z, mode=mode)
     assert check_stack_trace(f, ops_to_check="all")
 
     # check that the softmax has been optimized out
@@ -1331,8 +1331,8 @@ def test_h_softmax():
     W1 = np.asarray(
         np.random.normal(size=(input_size, h_softmax_level1_size)), dtype=config.floatX
     )
-    W1 = theano.shared(W1)
-    b1 = theano.shared(
+    W1 = aesara.shared(W1)
+    b1 = aesara.shared(
         np.asarray(np.zeros((h_softmax_level1_size,)), dtype=config.floatX)
     )
 
@@ -1343,8 +1343,8 @@ def test_h_softmax():
         ),
         dtype=config.floatX,
     )
-    W2 = theano.shared(W2)
-    b2 = theano.shared(
+    W2 = aesara.shared(W2)
+    b2 = aesara.shared(
         np.asarray(
             np.zeros((h_softmax_level1_size, h_softmax_level2_size)),
             dtype=config.floatX,
@@ -1381,8 +1381,8 @@ def test_h_softmax():
         b2,
     )
 
-    fun_output_tg = theano.function([x, y], y_hat_tg)
-    fun_output = theano.function([x], y_hat_all)
+    fun_output_tg = aesara.function([x, y], y_hat_tg)
+    fun_output = aesara.function([x], y_hat_all)
 
     x_mat = np.random.normal(size=(batch_size, input_size)).astype(config.floatX)
     y_mat = np.random.randint(0, output_size, batch_size).astype("int32")
@@ -1434,10 +1434,10 @@ def test_binary_crossentropy_reshape():
         binary_crossentropy(sigmoid(a).reshape((-1, 1)), 1).sum(),
     ):
 
-        ga = theano.grad(c, a)
+        ga = aesara.grad(c, a)
         # This only works when "specialize" options are included
-        mode = theano.compile.get_default_mode().including("fast_run")
-        fga = theano.function([a], ga, mode=mode)
+        mode = aesara.compile.get_default_mode().including("fast_run")
+        fga = aesara.function([a], ga, mode=mode)
         utt.assert_allclose(
             fga(np.array([[[[30.0]]]], dtype=config.floatX)),
             np.zeros((1, 1, 1, 1), dtype=config.floatX),
@@ -1470,10 +1470,10 @@ def test_matches_binary_crossentropy(self):
         pred, target = inputs = vectors("pt")
 
         reference_val = binary_crossentropy(sigmoid(pred), target)
-        f_reference = theano.function(inputs, reference_val)
+        f_reference = aesara.function(inputs, reference_val)
 
         test_val = sigmoid_binary_crossentropy(pred, target)
-        f_test = theano.function(inputs, test_val)
+        f_test = aesara.function(inputs, test_val)
 
         test_inputs = self._get_test_inputs()
         utt.assert_allclose(f_reference(*test_inputs), f_test(*test_inputs))
@@ -1496,7 +1496,7 @@ def numpy_conf_mat(actual, pred):
 
     x = vector()
     y = vector()
-    f = theano.function([x, y], confusion_matrix(x, y))
+    f = aesara.function([x, y], confusion_matrix(x, y))
     list_inputs = [
         [[0, 1, 2, 1, 0], [0, 0, 2, 1, 2]],
         [[2, 0, 2, 2, 0, 1], [0, 0, 2, 2, 0, 2]],
diff --git a/tests/tensor/nnet/test_batchnorm.py b/tests/tensor/nnet/test_batchnorm.py
index 1d3f381f7c..d91f5826dd 100644
--- a/tests/tensor/nnet/test_batchnorm.py
+++ b/tests/tensor/nnet/test_batchnorm.py
@@ -3,13 +3,12 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from theano.configdefaults import config
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.nnet import batchnorm
-from theano.tensor.type import (
+import aesara
+import aesara.tensor as tt
+from aesara.configdefaults import config
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.nnet import batchnorm
+from aesara.tensor.type import (
     TensorType,
     matrix,
     scalar,
@@ -18,6 +17,7 @@
     tensor5,
     vector,
 )
+from tests import unittest_tools as utt
 
 
 def test_BNComposite():
@@ -41,18 +41,18 @@ def bn_ref(x, G, B, M, V):
         m = vector("m")
         v = vector("v")
 
-        x.tag.test_value = np.random.rand(2, 2).astype(theano.config.floatX)
-        b.tag.test_value = np.random.rand(2).astype(theano.config.floatX)
-        g.tag.test_value = np.random.rand(2).astype(theano.config.floatX)
-        m.tag.test_value = np.random.rand(2).astype(theano.config.floatX)
-        v.tag.test_value = np.random.rand(2).astype(theano.config.floatX)
+        x.tag.test_value = np.random.rand(2, 2).astype(aesara.config.floatX)
+        b.tag.test_value = np.random.rand(2).astype(aesara.config.floatX)
+        g.tag.test_value = np.random.rand(2).astype(aesara.config.floatX)
+        m.tag.test_value = np.random.rand(2).astype(aesara.config.floatX)
+        v.tag.test_value = np.random.rand(2).astype(aesara.config.floatX)
 
         bn_ref_op = bn_ref(x, g, b, m, v)
-        f_ref = theano.function([x, b, g, m, v], [bn_ref_op])
+        f_ref = aesara.function([x, b, g, m, v], [bn_ref_op])
         res_ref = f_ref(X, G, B, M, V)
         for mode in ["low_mem", "high_mem"]:
             bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
-            f = theano.function([x, b, g, m, v], [bn_op])
+            f = aesara.function([x, b, g, m, v], [bn_op])
             res = f(X, G, B, M, V)
             utt.assert_allclose(res_ref, res)
 
@@ -76,11 +76,11 @@ def bn_ref(x, G, B, M, V):
     v = vector("v")
 
     bn_ref_op = bn_ref(x, g, b, m, v)
-    f_ref = theano.function([x, g, b, m, v], [bn_ref_op])
+    f_ref = aesara.function([x, g, b, m, v], [bn_ref_op])
     res_ref = f_ref(X, G, B, M, V)
     for mode in ["low_mem", "high_mem"]:
         bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
-        f = theano.function([x, g, b, m, v], [bn_op])
+        f = aesara.function([x, g, b, m, v], [bn_op])
         res = f(X, G, B, M, V)
         utt.assert_allclose(res_ref, res)
 
@@ -94,7 +94,7 @@ def bn_f(inputs, gamma, beta, mean, std):
     bn_ref_op = bn_ref(
         x, g, b, x.mean(axis=0, keepdims=True), x.std(axis=0, keepdims=True)
     )
-    f_ref = theano.function([x, b, g], [bn_ref_op])
+    f_ref = aesara.function([x, b, g], [bn_ref_op])
     res_ref = f_ref(X, G, B)
     for mode in ["low_mem", "high_mem"]:
         bn_op = batchnorm.batch_normalization(
@@ -105,7 +105,7 @@ def bn_f(inputs, gamma, beta, mean, std):
             x.std(axis=0, keepdims=True),
             mode=mode,
         )
-        f = theano.function([x, b, g], [bn_op])
+        f = aesara.function([x, b, g], [bn_op])
         res = f(X, G, B)
         utt.assert_allclose(res_ref, res)
 
@@ -144,7 +144,7 @@ def bn_ref(x, G, B, M, V):
         m.dimshuffle("x", 0, "x", "x"),
         v.dimshuffle("x", 0, "x", "x"),
     )
-    f_ref = theano.function([x, b, g, m, v], [bn_ref_op])
+    f_ref = aesara.function([x, b, g, m, v], [bn_ref_op])
     res_ref = f_ref(X, G, B, M, V)
 
     for mode in ["low_mem", "high_mem"]:
@@ -156,7 +156,7 @@ def bn_ref(x, G, B, M, V):
             v.dimshuffle("x", 0, "x", "x"),
             mode=mode,
         )
-        f = theano.function([x, b, g, m, v], [bn_op])
+        f = aesara.function([x, b, g, m, v], [bn_op])
         res = f(X, G, B, M, V)
         utt.assert_allclose(res_ref, res)
 
@@ -223,7 +223,7 @@ def test_batch_normalization_train():
             scale2 = tt.addbroadcast(scale, *axes2)
             bias2 = tt.addbroadcast(bias, *axes2)
             out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
-            m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
+            m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), aesara.config.floatX)
             out_running_mean2 = (
                 running_mean * (1 - running_average_factor)
                 + x_mean2 * running_average_factor
@@ -279,7 +279,7 @@ def test_batch_normalization_train():
                 return_disconnected="zero",
             )
             # compile
-            f = theano.function(
+            f = aesara.function(
                 [x, scale, bias, running_mean, running_var, dy, dx, dscale, dbias],
                 [
                     out,
@@ -318,19 +318,19 @@ def test_batch_normalization_train():
                 param_shape = tuple(
                     1 if d in axes2 else s for d, s in enumerate(data_shape)
                 )
-                X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-                Dy = -1 + 2 * np.random.randn(*data_shape).astype(theano.config.floatX)
-                Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
+                X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+                Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+                Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
                 Running_mean = np.random.randn(*param_shape).astype(
-                    theano.config.floatX
+                    aesara.config.floatX
                 )
-                Running_var = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Dx = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
+                Running_var = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Dx = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
                 Dscale = -1 + 2 * np.random.randn(*param_shape).astype(
-                    theano.config.floatX
+                    aesara.config.floatX
                 )
-                Dbias = np.random.randn(*param_shape).astype(theano.config.floatX)
+                Dbias = np.random.randn(*param_shape).astype(aesara.config.floatX)
 
                 outputs = f(
                     X, Scale, Bias, Running_mean, Running_var, Dy, Dx, Dscale, Dbias
@@ -448,7 +448,7 @@ def test_batch_normalization_train_without_running_averages():
     # backward pass
     grads = tt.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
     # compile
-    f = theano.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads)
+    f = aesara.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads)
     # check if the abstract Ops have been replaced
     assert not any(
         [
@@ -464,10 +464,10 @@ def test_batch_normalization_train_without_running_averages():
         ]
     )
     # run
-    X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-    Dy = -1 + 2 * np.random.randn(*data_shape).astype(theano.config.floatX)
-    Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-    Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
+    X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+    Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+    Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+    Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
     f(X, Scale, Bias, Dy)
 
 
@@ -552,16 +552,16 @@ def test_batch_normalization_train_broadcast():
             results = [abs(r - r_bc) for (r, r_bc) in zip(results_non_bc, results_bc)]
 
             # compile to compute all differences
-            f = theano.function(
+            f = aesara.function(
                 [x, scale, bias, running_mean, running_var], tt_sum(sum(results))
             )
 
             # the paired ops are exactly the same, so the optimizer should have
             # collapsed the sum of differences to a constant zero
             nodes = f.maker.fgraph.toposort()
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert len(nodes) == 1
-                assert isinstance(nodes[0].op, theano.compile.DeepCopyOp)
+                assert isinstance(nodes[0].op, aesara.compile.DeepCopyOp)
             inputs = [
                 np.asarray(np.random.rand(*((4,) * n)), x.dtype)
                 for n in [
@@ -616,7 +616,7 @@ def test_batch_normalization_test():
                 None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy}
             )
             # compile
-            f = theano.function(
+            f = aesara.function(
                 [x, scale, bias, mean, var, dy], [out, out2] + grads + grads2
             )
             # check if the abstract Ops have been replaced
@@ -639,12 +639,12 @@ def test_batch_normalization_test():
                 param_shape = tuple(
                     1 if d in axes2 else s for d, s in enumerate(data_shape)
                 )
-                X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
-                Dy = -1 + 2 * np.random.randn(*data_shape).astype(theano.config.floatX)
-                Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Mean = np.random.randn(*param_shape).astype(theano.config.floatX)
-                Var = np.random.rand(*param_shape).astype(theano.config.floatX)
+                X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+                Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
+                Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Mean = np.random.randn(*param_shape).astype(aesara.config.floatX)
+                Var = np.random.rand(*param_shape).astype(aesara.config.floatX)
                 outputs = f(X, Scale, Bias, Mean, Var, Dy)
                 # compare outputs
                 utt.assert_allclose(outputs[0], outputs[1])  # out
@@ -674,7 +674,7 @@ def test_batch_normalization_broadcastable():
     grads_train = tt.grad(None, wrt=[x, scale, bias], known_grads={out_train: dy})
     grads_test = tt.grad(None, wrt=[x, scale, bias], known_grads={out_test: dy})
     # compile
-    f = theano.function(
+    f = aesara.function(
         [x, scale, bias, mean, var, dy],
         [out_train, x_mean, x_invstd, out_test] + grads_train + grads_test,
     )
diff --git a/tests/tensor/nnet/test_blocksparse.py b/tests/tensor/nnet/test_blocksparse.py
index 7b1df69f3e..0719a474c3 100644
--- a/tests/tensor/nnet/test_blocksparse.py
+++ b/tests/tensor/nnet/test_blocksparse.py
@@ -4,27 +4,27 @@
 import numpy as np
 from numpy.random import randn
 
+import aesara
+import aesara.tensor as tt
 import tests.unittest_tools as utt
-import theano
-import theano.tensor as tt
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.nnet.blocksparse import (
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.nnet.blocksparse import (
     SparseBlockGemv,
     SparseBlockOuter,
     sparse_block_dot,
     sparse_block_gemv,
     sparse_block_outer,
 )
-from theano.tensor.type import fmatrix, ftensor3, ftensor4, imatrix
+from aesara.tensor.type import fmatrix, ftensor3, ftensor4, imatrix
 
 
 class TestBlockSparseGemvAndOuter(utt.InferShapeTester):
     def setup_method(self):
         utt.seed_rng()
         mode = None
-        if theano.config.mode == "FAST_COMPILE":
+        if aesara.config.mode == "FAST_COMPILE":
             mode = "FAST_RUN"
-        self.mode = theano.compile.get_mode(mode).excluding("constant_folding")
+        self.mode = aesara.compile.get_mode(mode).excluding("constant_folding")
         self.gemv_op = sparse_block_gemv
         self.outer_op = sparse_block_outer
         self.gemv_class = SparseBlockGemv
@@ -139,7 +139,7 @@ def test_sparseblockdot(self):
 
         o = sparse_block_dot(W, h, iIdx, b, oIdx)
 
-        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
+        f = aesara.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
 
         W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
 
@@ -152,7 +152,7 @@ def test_sparseblockdot(self):
         utt.assert_allclose(ref_out, th_out)
 
     def test_sparseblockgemv(self):
-        # Compares the numpy and theano versions of sparseblockgemv.
+        # Compares the numpy and aesara versions of sparseblockgemv.
 
         b = fmatrix()
         W = ftensor4()
@@ -162,7 +162,7 @@ def test_sparseblockgemv(self):
 
         o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
 
-        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
+        f = aesara.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
 
         W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
 
@@ -193,7 +193,7 @@ def test_sparseblockgemvF(self):
             oIdx,
         )
 
-        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
+        f = aesara.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
 
         W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
 
@@ -249,9 +249,9 @@ def test_sparseblockgemv_grad_shape(self):
         oIdx = imatrix()
 
         o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-        go = theano.grad(o.sum(), [b, W, h])
+        go = aesara.grad(o.sum(), [b, W, h])
 
-        f = theano.function([W, h, iIdx, b, oIdx], go, mode=self.mode)
+        f = aesara.function([W, h, iIdx, b, oIdx], go, mode=self.mode)
 
         W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
 
@@ -271,7 +271,7 @@ def test_sparseblockouter(self):
 
         out = self.outer_op(o, x, y, xIdx, yIdx)
 
-        f = theano.function(
+        f = aesara.function(
             [o, x, y, xIdx, yIdx], out, on_unused_input="warn", mode=self.mode
         )
 
diff --git a/tests/tensor/nnet/test_conv.py b/tests/tensor/nnet/test_conv.py
index 712a3c6d32..e5e0877178 100644
--- a/tests/tensor/nnet/test_conv.py
+++ b/tests/tensor/nnet/test_conv.py
@@ -3,25 +3,25 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
+from aesara.compile.mode import Mode
+from aesara.tensor.exceptions import NotScalarConstantError
+from aesara.tensor.math import _allclose, exp
+from aesara.tensor.nnet import conv, conv2d
+from aesara.tensor.type import dmatrix, dtensor3, dtensor4, dvector, scalar, tensor4
 from tests import unittest_tools as utt
-from theano.compile.mode import Mode
-from theano.tensor.exceptions import NotScalarConstantError
-from theano.tensor.math import _allclose, exp
-from theano.tensor.nnet import conv, conv2d
-from theano.tensor.type import dmatrix, dtensor3, dtensor4, dvector, scalar, tensor4
 
 
 @pytest.mark.skipif(
-    not conv.imported_scipy_signal and theano.config.cxx == "",
+    not conv.imported_scipy_signal and aesara.config.cxx == "",
     reason="conv2d tests need SciPy or a c++ compiler",
 )
 class TestConv2D(utt.InferShapeTester):
     # This class contains tests for the legacy 2d convolution,
     # but will also be inherited from for other implementations
     mode = None
-    dtype = theano.config.floatX
+    dtype = aesara.config.floatX
     # This will be set to the appropriate function in the inherited classes.
     # The call to `staticmethod` is necessary to prevent Python from passing
     # `self` as the first argument.
@@ -78,11 +78,11 @@ def validate(
         if not filters:
             filters = self.filters
 
-        # THEANO IMPLEMENTATION
+        # AESARA IMPLEMENTATION
 
         # we create a symbolic function so that verify_grad can work
         def sym_conv2d(input, filters):
-            # define theano graph and function
+            # define aesara graph and function
             input.name = "input"
             filters.name = "filters"
             rval = conv.conv2d(
@@ -101,13 +101,13 @@ def sym_conv2d(input, filters):
 
         output = sym_conv2d(input, filters)
         output.name = f"conv2d({input.name},{filters.name})"
-        theano_conv = theano.function([input, filters], output, mode=self.mode)
+        aesara_conv = aesara.function([input, filters], output, mode=self.mode)
 
         # initialize input and compute result
         image_data = np.random.random(N_image_shape).astype(self.dtype)
         filter_data = np.random.random(N_filter_shape).astype(self.dtype)
         try:
-            theano_output = theano_conv(image_data, filter_data)
+            aesara_output = aesara_conv(image_data, filter_data)
         except ValueError:
             if not should_raise:
                 raise
@@ -166,7 +166,7 @@ def sym_conv2d(input, filters):
                                 * filter2d[::-1, ::-1]
                             ).sum()
 
-        assert _allclose(theano_output, ref_output)
+        assert _allclose(aesara_output, ref_output)
 
         # TEST GRADIENT
         if verify_grad:
@@ -531,7 +531,7 @@ def test_missing_info(self):
     def test_wrong_info(self):
         # Test convolutions when we don't give a constant as shape information
 
-        i = theano.scalar.basic.int32()
+        i = aesara.scalar.basic.int32()
         with pytest.raises(NotScalarConstantError):
             self.validate(
                 (3, 2, 8, i),
@@ -597,8 +597,8 @@ def speed(self):
                     print("filter_shapes", filter_shapes)
                     for filter_shape in filter_shapes:
 
-                        input = theano.shared(np.random.random(image_shape))
-                        filters = theano.shared(np.random.random(filter_shape))
+                        input = aesara.shared(np.random.random(image_shape))
+                        filters = aesara.shared(np.random.random(filter_shape))
 
                         output = self.conv2d(
                             input,
@@ -610,13 +610,13 @@ def speed(self):
                             openmp=openmp,
                         )
                         mode = Mode(
-                            linker=theano.link.vm.VMLinker(
+                            linker=aesara.link.vm.VMLinker(
                                 allow_gc=False, use_cloop=True
                             )
                         )
-                        theano_conv = theano.function([], output, mode=mode)
+                        aesara_conv = aesara.function([], output, mode=mode)
                         t1 = time.time()
-                        theano_conv.fn(n_calls=n_calls)
+                        aesara_conv.fn(n_calls=n_calls)
                         t2 = time.time()
                         print(t2 - t1, end=" ")
                     print()
@@ -744,11 +744,11 @@ def test_broadcast_grad():
     window_radius = 3
 
     filter_1d = tt.arange(-window_radius, window_radius + 1)
-    filter_1d = filter_1d.astype(theano.config.floatX)
+    filter_1d = filter_1d.astype(aesara.config.floatX)
     filter_1d = exp(-0.5 * filter_1d ** 2 / sigma ** 2)
     filter_1d = filter_1d / filter_1d.sum()
 
     filter_W = filter_1d.dimshuffle(["x", "x", 0, "x"])
 
     y = conv2d(x1, filter_W, border_mode="full", filter_shape=[1, 1, None, None])
-    theano.grad(y.sum(), sigma)
+    aesara.grad(y.sum(), sigma)
diff --git a/tests/tensor/nnet/test_conv3d2d.py b/tests/tensor/nnet/test_conv3d2d.py
index 8762c96cb0..22b8ea69a5 100644
--- a/tests/tensor/nnet/test_conv3d2d.py
+++ b/tests/tensor/nnet/test_conv3d2d.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
 
 
 try:
@@ -12,9 +12,9 @@
     ndimage = None
 
 import tests.unittest_tools as utt
-from theano.compile.sharedvalue import shared
-from theano.graph.opt import check_stack_trace
-from theano.tensor.nnet.conv3d2d import (
+from aesara.compile.sharedvalue import shared
+from aesara.graph.opt import check_stack_trace
+from aesara.tensor.nnet.conv3d2d import (
     DiagonalSubtensor,
     IncDiagonalSubtensor,
     conv3d,
@@ -109,15 +109,15 @@ def check_diagonal_subtensor_view_traces(fn):
 
 
 @pytest.mark.skipif(
-    ndimage is None or not theano.config.cxx,
+    ndimage is None or not aesara.config.cxx,
     reason="conv3d2d tests need SciPy and a c++ compiler",
 )
 @pytest.mark.parametrize("border_mode", ("valid", "full", "half"))
 def test_conv3d(border_mode):
-    if theano.config.mode == "FAST_COMPILE":
-        mode = theano.compile.mode.get_mode("FAST_RUN")
+    if aesara.config.mode == "FAST_COMPILE":
+        mode = aesara.compile.mode.get_mode("FAST_RUN")
     else:
-        mode = theano.compile.mode.get_default_mode()
+        mode = aesara.compile.mode.get_default_mode()
 
     Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
     Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5
@@ -145,15 +145,15 @@ def test_conv3d(border_mode):
         border_mode=border_mode,
     )
 
-    newconv3d = theano.function([], [], updates={s_output: out}, mode=mode)
+    newconv3d = aesara.function([], [], updates={s_output: out}, mode=mode)
 
     check_diagonal_subtensor_view_traces(newconv3d)
     t0 = time.time()
     newconv3d()
     print(time.time() - t0)
     utt.assert_allclose(pyres, s_output.get_value(borrow=True))
-    gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
-    gnewconv3d = theano.function(
+    gsignals, gfilters = aesara.grad(out.sum(), [s_signals, s_filters])
+    gnewconv3d = aesara.function(
         [],
         [],
         updates=[(s_filters, gfilters), (s_signals, gsignals)],
@@ -205,14 +205,14 @@ def test_conv3d(border_mode):
         border_mode=border_mode,
     )
 
-    newconv3d = theano.function([], [], updates={s_output: out}, mode=mode)
+    newconv3d = aesara.function([], [], updates={s_output: out}, mode=mode)
 
     t0 = time.time()
     newconv3d()
     print(time.time() - t0)
     utt.assert_allclose(pyres, s_output.get_value(borrow=True))
-    gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
-    gnewconv3d = theano.function(
+    gsignals, gfilters = aesara.grad(out.sum(), [s_signals, s_filters])
+    gnewconv3d = aesara.function(
         [],
         [],
         updates=[(s_filters, gfilters), (s_signals, gsignals)],
diff --git a/tests/tensor/nnet/test_corr.py b/tests/tensor/nnet/test_corr.py
index 95393c9658..131f94cce9 100644
--- a/tests/tensor/nnet/test_corr.py
+++ b/tests/tensor/nnet/test_corr.py
@@ -1,8 +1,10 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
+from aesara.tensor.nnet import conv, corr
+from aesara.tensor.type import dmatrix, dtensor3, dtensor4, dvector, tensor4
 from tests import unittest_tools as utt
 from tests.tensor.nnet.test_abstract_conv import (
     TestAsymmetricPadding,
@@ -10,27 +12,25 @@
     TestGroupedConvNoOptim,
     TestUnsharedConv,
 )
-from theano.tensor.nnet import conv, corr
-from theano.tensor.type import dmatrix, dtensor3, dtensor4, dvector, tensor4
 
 
 @pytest.mark.skipif(
-    theano.config.cxx == "" or not conv.imported_scipy_signal,
+    aesara.config.cxx == "" or not conv.imported_scipy_signal,
     reason="SciPy and cxx needed",
 )
 class TestCorr2D(utt.InferShapeTester):
-    if theano.config.mode == "FAST_COMPILE":
-        mode = theano.compile.get_mode("FAST_RUN")
+    if aesara.config.mode == "FAST_COMPILE":
+        mode = aesara.compile.get_mode("FAST_RUN")
     else:
         mode = None
-    dtype = theano.config.floatX
+    dtype = aesara.config.floatX
 
     def setup_method(self):
         self.input = tensor4("input", dtype=self.dtype)
         self.input.name = "default_V"
         self.filters = tensor4("filters", dtype=self.dtype)
         self.filters.name = "default_filters"
-        # This tests can run even when theano.config.blas__ldflags is empty.
+        # This tests can run even when aesara.config.blas__ldflags is empty.
         super().setup_method()
 
     def validate(
@@ -49,7 +49,7 @@ def validate(
         :param image_shape: The constant shape info passed to corrMM.
         :param filter_shape: The constant shape info passed to corrMM.
         """
-        if not theano.config.cxx:
+        if not aesara.config.cxx:
             pytest.skip("Need cxx to test conv2d")
         N_image_shape = [
             tt.get_scalar_constant_value(tt.as_tensor_variable(x)) for x in image_shape
@@ -63,11 +63,11 @@ def validate(
         if filters is None:
             filters = self.filters
 
-        # THEANO IMPLEMENTATION
+        # AESARA IMPLEMENTATION
 
         # we create a symbolic function so that verify_grad can work
         def sym_CorrMM(input, filters):
-            # define theano graph and function
+            # define aesara graph and function
             input.name = "input"
             filters.name = "filters"
             rval = corr.CorrMM(border_mode, subsample, filter_dilation)(input, filters)
@@ -76,7 +76,7 @@ def sym_CorrMM(input, filters):
 
         output = sym_CorrMM(input, filters)
         output.name = f"CorrMM()({input.name},{filters.name})"
-        theano_corr = theano.function([input, filters], output, mode=self.mode)
+        aesara_corr = aesara.function([input, filters], output, mode=self.mode)
 
         # initialize input and compute result
         image_data = np.random.random(N_image_shape).astype(self.dtype)
@@ -91,7 +91,7 @@ def sym_CorrMM(input, filters):
             assert not image_data.flags["CONTIGUOUS"]
             assert not filter_data.flags["CONTIGUOUS"]
 
-        theano_output = theano_corr(image_data, filter_data)
+        aesara_output = aesara_corr(image_data, filter_data)
 
         # REFERENCE IMPLEMENTATION
         # Testing correlation, not convolution. Reverse filters.
@@ -159,7 +159,7 @@ def sym_CorrMM(input, filters):
                                 * filter2d[::-1, ::-1]
                             ).sum()
 
-        utt.assert_allclose(ref_output, theano_output)
+        utt.assert_allclose(ref_output, aesara_output)
 
         # TEST GRADIENT
         if verify_grad:
@@ -293,7 +293,7 @@ def test_wrong_input(self):
         with pytest.raises(Exception):
             self.validate((3, 2, 8, 8), (4, 2, 5, 5), "valid", input=dtensor3())
 
-    @pytest.mark.skipif(not theano.config.cxx, reason="Need cxx for this test")
+    @pytest.mark.skipif(not aesara.config.cxx, reason="Need cxx for this test")
     def test_dtype_upcast(self):
         # Checks dtype upcast for CorrMM methods.
 
@@ -309,19 +309,19 @@ def rand(shape, dtype="float64"):
         for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes):
             for a_dtype in dtypes:
                 for b_dtype in dtypes:
-                    c_dtype = theano.scalar.upcast(a_dtype, b_dtype)
+                    c_dtype = aesara.scalar.upcast(a_dtype, b_dtype)
                     a_tens = tensor4(dtype=a_dtype)
                     b_tens = tensor4(dtype=b_dtype)
                     a_tens_val = rand(a_shape, dtype=a_dtype)
                     b_tens_val = rand(b_shape, dtype=b_dtype)
 
                     c_tens = op()(a_tens, b_tens)
-                    f = theano.function([a_tens, b_tens], c_tens, mode=self.mode)
+                    f = aesara.function([a_tens, b_tens], c_tens, mode=self.mode)
                     assert f(a_tens_val, b_tens_val).dtype == c_dtype
 
     @pytest.mark.slow
     @pytest.mark.skipif(
-        theano.config.cxx == "" or not conv.imported_scipy_signal,
+        aesara.config.cxx == "" or not conv.imported_scipy_signal,
         reason="SciPy and cxx needed",
     )
     def test_infer_shape_forward(self):
@@ -369,8 +369,8 @@ def rand(*shape):
 
     @pytest.mark.slow
     @pytest.mark.skipif(
-        theano.config.mode == "FAST_COMPILE"
-        or theano.config.cxx == ""
+        aesara.config.mode == "FAST_COMPILE"
+        or aesara.config.cxx == ""
         or not conv.imported_scipy_signal,
         reason="SciPy and cxx needed",
     )
@@ -410,10 +410,10 @@ def rand(*shape):
                     cdtens = corrMM(border_mode=mode, subsample=subsample)(
                         adtens, bdtens
                     )
-                    f = theano.function([adtens, bdtens], cdtens)
+                    f = aesara.function([adtens, bdtens], cdtens)
                     cdtens_val = f(adtens_val, bdtens_val)
                     # CorrMM_gradWeights
-                    shape = (theano.shared(bivec_val[2]), theano.shared(bivec_val[3]))
+                    shape = (aesara.shared(bivec_val[2]), aesara.shared(bivec_val[3]))
                     bdtens_g = gradW(border_mode=mode, subsample=subsample)(
                         adtens, cdtens, shape=shape
                     )
@@ -427,7 +427,7 @@ def rand(*shape):
 
     @pytest.mark.slow
     @pytest.mark.skipif(
-        theano.config.mode == "FAST_COMPILE" or not theano.config.cxx,
+        aesara.config.mode == "FAST_COMPILE" or not aesara.config.cxx,
         reason="Need cxx for this test",
     )
     def test_infer_shape_gradI(self):
@@ -466,10 +466,10 @@ def rand(*shape):
                     cdtens = corrMM(border_mode=mode, subsample=subsample)(
                         adtens, bdtens
                     )
-                    f = theano.function([adtens, bdtens], cdtens)
+                    f = aesara.function([adtens, bdtens], cdtens)
                     cdtens_val = f(adtens_val, bdtens_val)
                     # CorrMM_gradInputs
-                    shape = (theano.shared(aivec_val[2]), theano.shared(aivec_val[3]))
+                    shape = (aesara.shared(aivec_val[2]), aesara.shared(aivec_val[3]))
                     adtens_g = gradI(border_mode=mode, subsample=subsample)(
                         bdtens, cdtens, shape=shape
                     )
@@ -497,7 +497,7 @@ def test_non_contiguous(self):
 
 
 class TestGroupCorr2d(TestGroupedConvNoOptim):
-    mode = theano.compile.get_mode("FAST_RUN").excluding("gpuarray")
+    mode = aesara.compile.get_mode("FAST_RUN").excluding("gpuarray")
     conv_op = corr.CorrMM
     conv_gradw_op = corr.CorrMM_gradWeights
     conv_gradi_op = corr.CorrMM_gradInputs
@@ -505,14 +505,14 @@ class TestGroupCorr2d(TestGroupedConvNoOptim):
     def test_graph(self):
         # define common values  first
         groups = 3
-        bottom = np.random.rand(3, 6, 5, 5).astype(theano.config.floatX)
-        kern = np.random.rand(9, 2, 3, 3).astype(theano.config.floatX)
+        bottom = np.random.rand(3, 6, 5, 5).astype(aesara.config.floatX)
+        kern = np.random.rand(9, 2, 3, 3).astype(aesara.config.floatX)
         bottom_sym = tensor4("bottom")
         kern_sym = tensor4("kern")
 
         # grouped convolution graph
         conv_group = self.conv(num_groups=groups)(bottom_sym, kern_sym)
-        gconv_func = theano.function([bottom_sym, kern_sym], conv_group, mode=self.mode)
+        gconv_func = aesara.function([bottom_sym, kern_sym], conv_group, mode=self.mode)
 
         # Graph for the normal hard way
         kern_offset = kern_sym.shape[0] // groups
@@ -525,7 +525,7 @@ def test_graph(self):
             for i in range(groups)
         ]
         concatenated_output = tt.concatenate(split_conv_output, axis=1)
-        conv_func = theano.function(
+        conv_func = aesara.function(
             [bottom_sym, kern_sym], concatenated_output, mode=self.mode
         )
 
@@ -538,8 +538,8 @@ def test_graph(self):
 
 
 class TestUnsharedCorr2d(TestUnsharedConv):
-    if theano.config.mode == "FAST_COMPILE":
-        mode = theano.compile.get_mode("FAST_RUN").excluding("gpuarray")
+    if aesara.config.mode == "FAST_COMPILE":
+        mode = aesara.compile.get_mode("FAST_RUN").excluding("gpuarray")
     else:
         mode = None
     conv2d_op = corr.CorrMM
@@ -548,8 +548,8 @@ class TestUnsharedCorr2d(TestUnsharedConv):
 
 
 class TestAsymmetricCorr(TestAsymmetricPadding):
-    if theano.config.mode == "FAST_COMPILE":
-        mode = theano.compile.get_mode("FAST_RUN").excluding("gpuarray")
+    if aesara.config.mode == "FAST_COMPILE":
+        mode = aesara.compile.get_mode("FAST_RUN").excluding("gpuarray")
     else:
         mode = None
     conv2d_op = corr.CorrMM
@@ -558,7 +558,7 @@ class TestAsymmetricCorr(TestAsymmetricPadding):
 
 
 class TestCausalCorr(TestCausalConv):
-    if theano.config.mode == "FAST_COMPILE":
-        mode = theano.compile.get_mode("FAST_RUN").excluding("gpuarray")
+    if aesara.config.mode == "FAST_COMPILE":
+        mode = aesara.compile.get_mode("FAST_RUN").excluding("gpuarray")
     else:
         mode = None
diff --git a/tests/tensor/nnet/test_corr3d.py b/tests/tensor/nnet/test_corr3d.py
index e4551812a9..841303cea1 100644
--- a/tests/tensor/nnet/test_corr3d.py
+++ b/tests/tensor/nnet/test_corr3d.py
@@ -1,31 +1,31 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
+from aesara.tensor.nnet import conv, corr3d
+from aesara.tensor.type import dmatrix, dtensor3, dtensor4, dtensor5, tensor5, vector
 from tests import unittest_tools as utt
 from tests.tensor.nnet.test_abstract_conv import TestGroupedConv3dNoOptim
-from theano.tensor.nnet import conv, corr3d
-from theano.tensor.type import dmatrix, dtensor3, dtensor4, dtensor5, tensor5, vector
 
 
 @pytest.mark.skipif(
-    theano.config.cxx == "" or not conv.imported_scipy_signal,
+    aesara.config.cxx == "" or not conv.imported_scipy_signal,
     reason="SciPy and cxx needed",
 )
 class TestCorr3D(utt.InferShapeTester):
-    if theano.config.mode == "FAST_COMPILE":
-        mode = theano.compile.get_mode("FAST_RUN")
+    if aesara.config.mode == "FAST_COMPILE":
+        mode = aesara.compile.get_mode("FAST_RUN")
     else:
         mode = None
-    dtype = theano.config.floatX
+    dtype = aesara.config.floatX
 
     def setup_method(self):
         self.input = tensor5("input", dtype=self.dtype)
         self.input.name = "default_V"
         self.filters = tensor5("filters", dtype=self.dtype)
         self.filters.name = "default_filters"
-        # This tests can run even when theano.config.blas__ldflags is empty.
+        # This tests can run even when aesara.config.blas__ldflags is empty.
         super().setup_method()
 
     def validate(
@@ -44,7 +44,7 @@ def validate(
         :param image_shape: The constant shape info passed to corr3dMM.
         :param filter_shape: The constant shape info passed to corr3dMM.
         """
-        if not theano.config.cxx:
+        if not aesara.config.cxx:
             pytest.skip("Need cxx for this test")
 
         N_image_shape = [
@@ -59,11 +59,11 @@ def validate(
         if filters is None:
             filters = self.filters
 
-        # THEANO IMPLEMENTATION
+        # AESARA IMPLEMENTATION
 
         # we create a symbolic function so that verify_grad can work
         def sym_Corr3dMM(input, filters):
-            # define theano graph and function
+            # define aesara graph and function
             input.name = "input"
             filters.name = "filters"
             rval = corr3d.Corr3dMM(border_mode, subsample, filter_dilation)(
@@ -74,7 +74,7 @@ def sym_Corr3dMM(input, filters):
 
         output = sym_Corr3dMM(input, filters)
         output.name = f"Corr3dMM()({input.name},{filters.name})"
-        theano_corr = theano.function([input, filters], output, mode=self.mode)
+        aesara_corr = aesara.function([input, filters], output, mode=self.mode)
 
         # initialize input and compute result
         image_data = np.random.random(N_image_shape).astype(self.dtype)
@@ -91,7 +91,7 @@ def sym_Corr3dMM(input, filters):
             assert not image_data.flags["CONTIGUOUS"]
             assert not filter_data.flags["CONTIGUOUS"]
 
-        theano_output = theano_corr(image_data, filter_data)
+        aesara_output = aesara_corr(image_data, filter_data)
 
         # REFERENCE IMPLEMENTATION
         # Testing correlation, not convolution. Reverse filters.
@@ -167,7 +167,7 @@ def sym_Corr3dMM(input, filters):
                                     * filter3d[::-1, ::-1, ::-1]
                                 ).sum()
 
-        utt.assert_allclose(theano_output, ref_output)
+        utt.assert_allclose(aesara_output, ref_output)
 
         # TEST GRADIENT
         if verify_grad:
@@ -333,7 +333,7 @@ def test_wrong_input(self):
         with pytest.raises(Exception):
             self.validate((3, 2, 8, 8, 8), (4, 2, 5, 5, 5), "valid", input=dtensor4())
 
-    @pytest.mark.skipif(not theano.config.cxx, reason="Need cxx for this test")
+    @pytest.mark.skipif(not aesara.config.cxx, reason="Need cxx for this test")
     def test_dtype_upcast(self):
         # Checks dtype upcast for Corr3dMM methods.
 
@@ -349,19 +349,19 @@ def rand(shape, dtype="float64"):
         for op, a_shape, b_shape in zip(ops, a_shapes, b_shapes):
             for a_dtype in dtypes:
                 for b_dtype in dtypes:
-                    c_dtype = theano.scalar.upcast(a_dtype, b_dtype)
+                    c_dtype = aesara.scalar.upcast(a_dtype, b_dtype)
                     a_tens = tensor5(dtype=a_dtype)
                     b_tens = tensor5(dtype=b_dtype)
                     a_tens_val = rand(a_shape, dtype=a_dtype)
                     b_tens_val = rand(b_shape, dtype=b_dtype)
 
                     c_tens = op()(a_tens, b_tens)
-                    f = theano.function([a_tens, b_tens], c_tens, mode=self.mode)
+                    f = aesara.function([a_tens, b_tens], c_tens, mode=self.mode)
                     assert f(a_tens_val, b_tens_val).dtype == c_dtype
 
     @pytest.mark.slow
     @pytest.mark.skipif(
-        theano.config.mode == "FAST_COMPILE" or not theano.config.cxx,
+        aesara.config.mode == "FAST_COMPILE" or not aesara.config.cxx,
         reason="Need cxx for this test",
     )
     def test_infer_shape_forward(self):
@@ -409,7 +409,7 @@ def rand(*shape):
 
     @pytest.mark.slow
     @pytest.mark.skipif(
-        theano.config.mode == "FAST_COMPILE" or not theano.config.cxx,
+        aesara.config.mode == "FAST_COMPILE" or not aesara.config.cxx,
         reason="Need cxx for this test",
     )
     def test_infer_shape_gradW(self):
@@ -448,13 +448,13 @@ def rand(*shape):
                     cdtens = corr3dMM(border_mode=mode, subsample=subsample)(
                         adtens, bdtens
                     )
-                    f = theano.function([adtens, bdtens], cdtens)
+                    f = aesara.function([adtens, bdtens], cdtens)
                     cdtens_val = f(adtens_val, bdtens_val)
                     # Corr3dMM_gradWeights
                     shape = (
-                        theano.shared(bivec_val[2]),
-                        theano.shared(bivec_val[3]),
-                        theano.shared(bivec_val[4]),
+                        aesara.shared(bivec_val[2]),
+                        aesara.shared(bivec_val[3]),
+                        aesara.shared(bivec_val[4]),
                     )
                     bdtens_g = gradW(border_mode=mode, subsample=subsample)(
                         adtens, cdtens, shape=shape
@@ -469,7 +469,7 @@ def rand(*shape):
 
     @pytest.mark.slow
     @pytest.mark.skipif(
-        theano.config.mode == "FAST_COMPILE" or not theano.config.cxx,
+        aesara.config.mode == "FAST_COMPILE" or not aesara.config.cxx,
         reason="Need cxx for this test",
     )
     def test_infer_shape_gradI(self):
@@ -508,13 +508,13 @@ def rand(*shape):
                     cdtens = corr3dMM(border_mode=mode, subsample=subsample)(
                         adtens, bdtens
                     )
-                    f = theano.function([adtens, bdtens], cdtens)
+                    f = aesara.function([adtens, bdtens], cdtens)
                     cdtens_val = f(adtens_val, bdtens_val)
                     # Corr3dMM_gradInputs
                     shape = (
-                        theano.shared(aivec_val[2]),
-                        theano.shared(aivec_val[3]),
-                        theano.shared(aivec_val[4]),
+                        aesara.shared(aivec_val[2]),
+                        aesara.shared(aivec_val[3]),
+                        aesara.shared(aivec_val[4]),
                     )
                     adtens_g = gradI(border_mode=mode, subsample=subsample)(
                         bdtens, cdtens, shape=shape
@@ -541,7 +541,7 @@ def test_non_contiguous(self):
 
 
 class TestGroupCorr3d(TestGroupedConv3dNoOptim):
-    mode = theano.compile.get_mode("FAST_RUN")
+    mode = aesara.compile.get_mode("FAST_RUN")
     conv_op = corr3d.Corr3dMM
     conv_gradw_op = corr3d.Corr3dMMGradWeights
     conv_gradi_op = corr3d.Corr3dMMGradInputs
diff --git a/tests/tensor/nnet/test_ctc.py b/tests/tensor/nnet/test_ctc.py
index b66365e2c7..248209d4b1 100644
--- a/tests/tensor/nnet/test_ctc.py
+++ b/tests/tensor/nnet/test_ctc.py
@@ -1,14 +1,14 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from theano.tensor.nnet.ctc import (
+import aesara
+import aesara.tensor as tt
+from aesara.tensor.nnet.ctc import (
     ConnectionistTemporalClassification,
     ctc,
     ctc_available,
 )
+from tests import unittest_tools as utt
 
 
 def setup_torch_case():
@@ -106,7 +106,7 @@ def setup_grad_case():
     not ctc_available(), reason="Optional library warp-ctc not available"
 )
 @pytest.mark.skipif(
-    theano.config.mode == "FAST_COMPILE" or theano.config.cxx == "",
+    aesara.config.mode == "FAST_COMPILE" or aesara.config.cxx == "",
     reason="We need a c compiler",
 )
 class TestCTC:
@@ -121,15 +121,15 @@ def run_ctc(
         self, activations, labels, input_length, expected_costs, expected_grads
     ):
         # Create symbolic variables
-        t_activations = theano.shared(activations, name="activations")
-        t_activation_times = theano.shared(input_length, name="activation_times")
-        t_labels = theano.shared(labels, name="labels")
+        t_activations = aesara.shared(activations, name="activations")
+        t_activation_times = aesara.shared(input_length, name="activation_times")
+        t_labels = aesara.shared(labels, name="labels")
 
         t_cost = ctc(t_activations, t_labels, t_activation_times)
         # Symbolic gradient of CTC cost
         t_grad = tt.grad(tt.mean(t_cost), t_activations)
         # Compile symbolic functions
-        train = theano.function([], [t_cost, t_grad])
+        train = aesara.function([], [t_cost, t_grad])
 
         cost, grad = train()
 
@@ -143,7 +143,7 @@ def check_grads_disabled(self, activations, labels, input_length):
         Check if optimization to disable gradients is working
         """
         ctc_cost = ctc(activations, labels, input_length)
-        ctc_function = theano.function([], [ctc_cost])
+        ctc_function = aesara.function([], [ctc_cost])
         for node in ctc_function.maker.fgraph.apply_nodes:
             if isinstance(node.op, ConnectionistTemporalClassification):
                 assert node.op.compute_grad is False
@@ -172,8 +172,8 @@ def test_verify_grad(self):
         def ctc_op_functor(labels, in_lengths):
             def wrapper(acts):
                 # Create auxiliary symbolic variables
-                t_activation_times = theano.shared(in_lengths, name="activation_times")
-                t_labels = theano.shared(labels, name="labels")
+                t_activation_times = aesara.shared(in_lengths, name="activation_times")
+                t_labels = aesara.shared(labels, name="labels")
                 return ctc(acts, t_labels, t_activation_times)
 
             return wrapper
diff --git a/tests/tensor/nnet/test_neighbours.py b/tests/tensor/nnet/test_neighbours.py
index debbcd2f4e..40e7920500 100644
--- a/tests/tensor/nnet/test_neighbours.py
+++ b/tests/tensor/nnet/test_neighbours.py
@@ -1,17 +1,17 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
+from aesara import function, shared
+from aesara.configdefaults import config
+from aesara.tensor import nnet
+from aesara.tensor.nnet.neighbours import Images2Neibs, images2neibs, neibs2images
+from aesara.tensor.type import dtensor4, ftensor4, ivector, matrix, tensor4
 from tests import unittest_tools
-from theano import function, shared
-from theano.configdefaults import config
-from theano.tensor import nnet
-from theano.tensor.nnet.neighbours import Images2Neibs, images2neibs, neibs2images
-from theano.tensor.type import dtensor4, ftensor4, ivector, matrix, tensor4
 
 
-mode_without_gpu = theano.compile.mode.get_default_mode().excluding("gpu")
+mode_without_gpu = aesara.compile.mode.get_default_mode().excluding("gpu")
 
 
 class TestImages2Neibs(unittest_tools.InferShapeTester):
@@ -297,7 +297,7 @@ def test_neibs_half_step_by_valid(self):
         ):
             for neib_shape in neib_shapes:
                 for dtype in self.dtypes:
-                    x = theano.shared(np.random.randn(*shape).astype(dtype))
+                    x = aesara.shared(np.random.randn(*shape).astype(dtype))
                     extra = (neib_shape[0] // 2, neib_shape[1] // 2)
                     padded_shape = (
                         x.shape[0],
@@ -313,8 +313,8 @@ def test_neibs_half_step_by_valid(self):
                         padded_x, neib_shape, neib_step, mode="valid"
                     )
                     x_using_half = images2neibs(x, neib_shape, neib_step, mode="half")
-                    f_valid = theano.function([], x_using_valid, mode="FAST_RUN")
-                    f_half = theano.function([], x_using_half, mode=self.mode)
+                    f_valid = aesara.function([], x_using_valid, mode="FAST_RUN")
+                    f_half = aesara.function([], x_using_half, mode=self.mode)
                     unittest_tools.assert_allclose(f_valid(), f_half())
 
     @pytest.mark.slow
@@ -334,7 +334,7 @@ def test_neibs_full_step_by_valid(self):
         ):
             for neib_shape in neib_shapes:
                 for dtype in self.dtypes:
-                    x = theano.shared(np.random.randn(*shape).astype(dtype))
+                    x = aesara.shared(np.random.randn(*shape).astype(dtype))
                     extra = (neib_shape[0] - 1, neib_shape[1] - 1)
                     padded_shape = (
                         x.shape[0],
@@ -350,8 +350,8 @@ def test_neibs_full_step_by_valid(self):
                         padded_x, neib_shape, neib_step, mode="valid"
                     )
                     x_using_full = images2neibs(x, neib_shape, neib_step, mode="full")
-                    f_valid = theano.function([], x_using_valid, mode="FAST_RUN")
-                    f_full = theano.function([], x_using_full, mode=self.mode)
+                    f_valid = aesara.function([], x_using_valid, mode="FAST_RUN")
+                    f_full = aesara.function([], x_using_full, mode=self.mode)
                     unittest_tools.assert_allclose(f_valid(), f_full())
 
     @config.change_flags(compute_test_value="off")
@@ -471,7 +471,7 @@ def test_neibs_valid_with_inconsistent_borders(self):
         images = dtensor4()
         images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
 
-        f = theano.function(
+        f = aesara.function(
             [images], tt.sqr(images2neibs(images, (2, 2), mode="valid")), mode=self.mode
         )
         with pytest.raises(TypeError):
@@ -482,7 +482,7 @@ def test_neibs_half_with_inconsistent_borders(self):
         images = dtensor4()
         images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
 
-        f = theano.function(
+        f = aesara.function(
             [images], tt.sqr(images2neibs(images, (2, 2), mode="half")), mode=self.mode
         )
         with pytest.raises(TypeError):
@@ -493,7 +493,7 @@ def test_neibs_full_with_inconsistent_borders(self):
         images = dtensor4()
         images_val = np.arange(np.prod(shape), dtype="float32").reshape(shape)
 
-        f = theano.function(
+        f = aesara.function(
             [images], tt.sqr(images2neibs(images, (2, 2), mode="full")), mode=self.mode
         )
         with pytest.raises(TypeError):
@@ -506,14 +506,14 @@ def test_can_not_infer_nb_dim(self):
 
         img = tensor4("img")
         patches = nnet.neighbours.images2neibs(img, [16, 16])
-        extractPatches = theano.function([img], patches, mode=self.mode)
+        extractPatches = aesara.function([img], patches, mode=self.mode)
 
         patsRecovery = matrix("patsRecovery")
         original_size = ivector("original_size")
 
         for mode in ["valid", "ignore_borders"]:
             out = neibs2images(patsRecovery, (16, 16), original_size, mode=mode)
-            f = theano.function([patsRecovery, original_size], out, mode=self.mode)
+            f = aesara.function([patsRecovery, original_size], out, mode=self.mode)
 
             im_val = np.ones((1, 3, 320, 320), dtype=np.float32)
             neibs = extractPatches(im_val)
diff --git a/tests/tensor/nnet/test_opt.py b/tests/tensor/nnet/test_opt.py
index e36e326076..4aebc6c8ff 100644
--- a/tests/tensor/nnet/test_opt.py
+++ b/tests/tensor/nnet/test_opt.py
@@ -1,14 +1,14 @@
-import theano
-from tests.unittest_tools import assertFailure_fast
-from theano.graph.opt import check_stack_trace
-from theano.tensor.nnet.blocksparse import (
+import aesara
+from aesara.graph.opt import check_stack_trace
+from aesara.tensor.nnet.blocksparse import (
     sparse_block_dot,
     sparse_block_gemv,
     sparse_block_gemv_inplace,
     sparse_block_outer,
     sparse_block_outer_inplace,
 )
-from theano.tensor.type import fmatrix, ftensor3, ftensor4, lmatrix
+from aesara.tensor.type import fmatrix, ftensor3, ftensor4, lmatrix
+from tests.unittest_tools import assertFailure_fast
 
 
 def test_blocksparse_inplace_gemv_opt():
@@ -20,9 +20,9 @@ def test_blocksparse_inplace_gemv_opt():
 
     o = sparse_block_dot(W, h, iIdx, b, oIdx)
 
-    f = theano.function([W, h, iIdx, b, oIdx], o)
+    f = aesara.function([W, h, iIdx, b, oIdx], o)
 
-    if theano.config.mode == "FAST_COMPILE":
+    if aesara.config.mode == "FAST_COMPILE":
         assert not f.maker.fgraph.toposort()[-1].op.inplace
         assert check_stack_trace(f, ops_to_check=[sparse_block_gemv])
     else:
@@ -30,7 +30,7 @@ def test_blocksparse_inplace_gemv_opt():
         assert check_stack_trace(f, ops_to_check=[sparse_block_gemv_inplace])
 
 
-if theano.config.mode != "FAST_COMPILE":
+if aesara.config.mode != "FAST_COMPILE":
     test_blocksparse_inplace_gemv_opt = assertFailure_fast(
         test_blocksparse_inplace_gemv_opt
     )
@@ -45,11 +45,11 @@ def test_blocksparse_inplace_outer_opt():
 
     o = sparse_block_dot(W, h, iIdx, b, oIdx)
 
-    f = theano.function(
-        [W, h, iIdx, b, oIdx], [o, theano.gradient.grad(o.sum(), wrt=W)]
+    f = aesara.function(
+        [W, h, iIdx, b, oIdx], [o, aesara.gradient.grad(o.sum(), wrt=W)]
     )
 
-    if theano.config.mode == "FAST_COMPILE":
+    if aesara.config.mode == "FAST_COMPILE":
         assert not f.maker.fgraph.toposort()[-1].op.inplace
         assert check_stack_trace(f, ops_to_check=sparse_block_outer)
     else:
diff --git a/tests/tensor/nnet/test_sigm.py b/tests/tensor/nnet/test_sigm.py
index db7b3d906d..eafb956813 100644
--- a/tests/tensor/nnet/test_sigm.py
+++ b/tests/tensor/nnet/test_sigm.py
@@ -1,21 +1,13 @@
 import numpy as np
 
-import theano
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from tests.tensor.utils import (
-    _good_broadcast_unary_normal_no_complex,
-    check_floatX,
-    copymod,
-    makeBroadcastTester,
-    upcast_int8_nfunc,
-)
-from theano.configdefaults import config
-from theano.graph.opt import check_stack_trace
-from theano.graph.toolbox import is_same_graph
-from theano.tensor.inplace import neg_inplace
-from theano.tensor.math import clip, exp, log, mul, neg
-from theano.tensor.nnet.sigm import (
+import aesara
+import aesara.tensor as tt
+from aesara.configdefaults import config
+from aesara.graph.opt import check_stack_trace
+from aesara.graph.toolbox import is_same_graph
+from aesara.tensor.inplace import neg_inplace
+from aesara.tensor.math import clip, exp, log, mul, neg
+from aesara.tensor.nnet.sigm import (
     ScalarSoftplus,
     compute_mul,
     hard_sigmoid,
@@ -29,8 +21,16 @@
     softplus,
     ultra_fast_sigmoid,
 )
-from theano.tensor.shape import Reshape
-from theano.tensor.type import fmatrix, matrix, scalar, vector, vectors
+from aesara.tensor.shape import Reshape
+from aesara.tensor.type import fmatrix, matrix, scalar, vector, vectors
+from tests import unittest_tools as utt
+from tests.tensor.utils import (
+    _good_broadcast_unary_normal_no_complex,
+    check_floatX,
+    copymod,
+    makeBroadcastTester,
+    upcast_int8_nfunc,
+)
 
 
 class TestSigmoid:
@@ -49,7 +49,7 @@ def test_elemwise(self):
     good=copymod(
         _good_broadcast_unary_normal_no_complex, without=["uint16"]
     ),  # The reason that 'uint16' is excluted is that
-    # theano works well but numpy overflows resulting
+    # aesara works well but numpy overflows resulting
     # in an assertion error.
     # grad=_grad_broadcast_unary_normal,
     name="SigmoidTester",
@@ -135,9 +135,9 @@ def get_mode(self, excluding=None):
             excluding = []
         m = config.mode
         if m == "FAST_COMPILE":
-            mode = theano.compile.mode.get_mode("FAST_RUN")
+            mode = aesara.compile.mode.get_mode("FAST_RUN")
         else:
-            mode = theano.compile.mode.get_default_mode()
+            mode = aesara.compile.mode.get_default_mode()
         if excluding:
             return mode.excluding(*excluding)
         else:
@@ -153,37 +153,37 @@ def test_exp_over_1_plus_exp(self):
         config.warn__identify_1pexp_bug = False
         try:
             # tests exp_over_1_plus_exp
-            f = theano.function([x], exp(x) / (1 + exp(x)), mode=m)
+            f = aesara.function([x], exp(x) / (1 + exp(x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
             f(data)
-            f = theano.function([x], exp(x) / (2 + exp(x)), mode=m)
+            f = aesara.function([x], exp(x) / (2 + exp(x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
             f(data)
-            f = theano.function([x], exp(x) / (1 - exp(x)), mode=m)
+            f = aesara.function([x], exp(x) / (1 - exp(x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
             f(data)
-            f = theano.function([x], exp(x + 1) / (1 + exp(x)), mode=m)
+            f = aesara.function([x], exp(x + 1) / (1 + exp(x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
             f(data)
 
             # tests inv_1_plus_exp
-            f = theano.function([x], tt.fill(x, 1.0) / (1 + exp(-x)), mode=m)
+            f = aesara.function([x], tt.fill(x, 1.0) / (1 + exp(-x)), mode=m)
             # todo: solve issue #4589 first
             # assert check_stack_trace(f, ops_to_check=sigmoid)
             assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
             f(data)
-            f = theano.function([x], tt.fill(x, 1.0) / (2 + exp(-x)), mode=m)
+            f = aesara.function([x], tt.fill(x, 1.0) / (2 + exp(-x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
             f(data)
-            f = theano.function([x], tt.fill(x, 1.0) / (1 - exp(-x)), mode=m)
+            f = aesara.function([x], tt.fill(x, 1.0) / (1 - exp(-x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
             f(data)
-            f = theano.function([x], tt.fill(x, 1.1) / (1 + exp(-x)), mode=m)
+            f = aesara.function([x], tt.fill(x, 1.1) / (1 + exp(-x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
             f(data)
 
             # tests inv_1_plus_exp with neg
-            f = theano.function([x], tt.fill(x, -1.0) / (1 + exp(-x)), mode=m)
+            f = aesara.function([x], tt.fill(x, -1.0) / (1 + exp(-x)), mode=m)
             # todo: solve issue #4589 first
             # assert check_stack_trace(
             #     f, ops_to_check=[sigmoid, neg_inplace])
@@ -192,19 +192,19 @@ def test_exp_over_1_plus_exp(self):
                 neg_inplace,
             ]
             f(data)
-            f = theano.function([x], tt.fill(x, -1.0) / (1 - exp(-x)), mode=m)
+            f = aesara.function([x], tt.fill(x, -1.0) / (1 - exp(-x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [
                 sigmoid,
                 neg_inplace,
             ]
             f(data)
-            f = theano.function([x], tt.fill(x, -1.0) / (2 + exp(-x)), mode=m)
+            f = aesara.function([x], tt.fill(x, -1.0) / (2 + exp(-x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [
                 sigmoid,
                 neg_inplace,
             ]
             f(data)
-            f = theano.function([x], tt.fill(x, -1.1) / (1 + exp(-x)), mode=m)
+            f = aesara.function([x], tt.fill(x, -1.1) / (1 + exp(-x)), mode=m)
             assert [node.op for node in f.maker.fgraph.toposort()] != [
                 sigmoid,
                 neg_inplace,
@@ -215,7 +215,7 @@ def test_exp_over_1_plus_exp(self):
             # (-1)(exp(x)) / (1+exp(x))(1+exp(-x))
             # = (-1)/(1+exp(-x)) * exp(x)/(1+exp(x))
             # = - (sigm(x) * sigm(x))
-            f = theano.function(
+            f = aesara.function(
                 [x],
                 (tt.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (1 + exp(-x))),
                 mode=m,
@@ -224,7 +224,7 @@ def test_exp_over_1_plus_exp(self):
             # assert check_stack_trace(f, ops_to_check=[sigmoid, mul])
             assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid, mul]
             f(data)
-            f = theano.function(
+            f = aesara.function(
                 [x],
                 (tt.fill(x, -1.1) * exp(x)) / ((1 + exp(x)) * (1 + exp(-x))),
                 mode=m,
@@ -235,7 +235,7 @@ def test_exp_over_1_plus_exp(self):
                 neg_inplace,
             ]
             f(data)
-            f = theano.function(
+            f = aesara.function(
                 [x],
                 (tt.fill(x, -1.0) * exp(x)) / ((2 + exp(x)) * (1 + exp(-x))),
                 mode=m,
@@ -246,7 +246,7 @@ def test_exp_over_1_plus_exp(self):
                 neg_inplace,
             ]
             f(data)
-            f = theano.function(
+            f = aesara.function(
                 [x],
                 (tt.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (2 + exp(-x))),
                 mode=m,
@@ -257,7 +257,7 @@ def test_exp_over_1_plus_exp(self):
                 neg_inplace,
             ]
             f(data)
-            f = theano.function(
+            f = aesara.function(
                 [x],
                 (tt.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (1 + exp(x))),
                 mode=m,
@@ -268,7 +268,7 @@ def test_exp_over_1_plus_exp(self):
                 neg_inplace,
             ]
             f(data)
-            f = theano.function(
+            f = aesara.function(
                 [x],
                 (tt.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (2 + exp(-x))),
                 mode=m,
@@ -292,7 +292,7 @@ def test_1msigmoid(self):
         x = fmatrix()
 
         # tests exp_over_1_plus_exp
-        f = theano.function([x], 1 - exp(x) / (1 + exp(x)), mode=m)
+        f = aesara.function([x], 1 - exp(x) / (1 + exp(x)), mode=m)
         assert check_stack_trace(f, ops_to_check=[neg, sigmoid_inplace])
         assert [node.op for node in f.maker.fgraph.toposort()] == [
             neg,
@@ -300,7 +300,7 @@ def test_1msigmoid(self):
         ]
 
         # tests inv_1_plus_exp
-        f = theano.function([x], 1 - tt.fill(x, 1.0) / (1 + exp(-x)), mode=m)
+        f = aesara.function([x], 1 - tt.fill(x, 1.0) / (1 + exp(-x)), mode=m)
         assert check_stack_trace(f, ops_to_check=[neg, sigmoid_inplace])
         assert [node.op for node in f.maker.fgraph.toposort()] == [
             neg,
@@ -319,19 +319,19 @@ def match(func, ops):
         m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"])
         x, y = vectors("x", "y")
 
-        f = theano.function([x], sigmoid(-x) * exp(x), mode=m)
+        f = aesara.function([x], sigmoid(-x) * exp(x), mode=m)
         match(f, [sigmoid])
         assert check_stack_trace(f, ops_to_check=sigmoid)
 
-        f = theano.function([x], sigmoid(x) * exp(-x), mode=m)
+        f = aesara.function([x], sigmoid(x) * exp(-x), mode=m)
         match(f, [neg, sigmoid])
         assert check_stack_trace(f, ops_to_check=sigmoid)
 
-        f = theano.function([x], -(-(-(sigmoid(x)))) * exp(-x), mode=m)
+        f = aesara.function([x], -(-(-(sigmoid(x)))) * exp(-x), mode=m)
         match(f, [neg, sigmoid, neg])
         # assert check_stack_trace(f, ops_to_check=sigmoid)
 
-        f = theano.function(
+        f = aesara.function(
             [x, y],
             (sigmoid(x) * sigmoid(-y) * -exp(-x) * exp(x * y) * exp(y)),
             mode=m,
@@ -346,7 +346,7 @@ def test_perform_sigm_times_exp(self):
         # Test the core function doing the `sigm_times_exp` optimization.
         #
         # It is easier to test different graph scenarios this way than by
-        # compiling a theano function.
+        # compiling an Aesara function.
 
         x, y, z, t = vectors("x", "y", "z", "t")
         exp_op = exp
@@ -360,9 +360,9 @@ def ok(expr1, expr2):
                 print(trees[0])
                 print(trees[1])
                 print("***")
-                theano.printing.debugprint(compute_mul(trees[0]))
+                aesara.printing.debugprint(compute_mul(trees[0]))
                 print("***")
-                theano.printing.debugprint(compute_mul(trees[1]))
+                aesara.printing.debugprint(compute_mul(trees[1]))
             assert good
 
         ok(sigmoid(x) * exp_op(-x), sigmoid(-x))
@@ -398,13 +398,13 @@ def test_grad_log1msigm(self):
         s = sigmoid(x)
         l = log(1 - s)
         c = l.mean()
-        ux = x - lr * theano.grad(c, x)
+        ux = x - lr * aesara.grad(c, x)
 
         # Before the optimization, inf and NaN will be produced in the graph,
         # and DebugMode will complain. Everything is fine afterwards.
         mode = self.get_mode()
-        if not isinstance(mode, theano.compile.debugmode.DebugMode):
-            f = theano.function([x, lr], ux, mode=mode)
+        if not isinstance(mode, aesara.compile.debugmode.DebugMode):
+            f = aesara.function([x, lr], ux, mode=mode)
             ux_v = f([[50]], 0.1)
             assert not np.isnan(ux_v)
 
@@ -413,14 +413,14 @@ def test_local_ultra_fast_sigmoid(self):
         s = sigmoid(x)
 
         mode = self.get_mode("local_ultra_fast_sigmoid")
-        f = theano.function([x], s, mode=mode)
+        f = aesara.function([x], s, mode=mode)
         assert check_stack_trace(f, ops_to_check=sigmoid)
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 1
         assert topo[0].op == sigmoid
 
         mode = self.get_mode().including("local_ultra_fast_sigmoid")
-        f = theano.function([x], s, mode=mode)
+        f = aesara.function([x], s, mode=mode)
         assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid)
         topo = f.maker.fgraph.toposort()
         assert topo[0].op == ultra_fast_sigmoid
@@ -432,31 +432,31 @@ def test_local_hard_sigmoid(self):
         s = sigmoid(x)
 
         mode = self.get_mode("local_hard_sigmoid")
-        f = theano.function([x], s, mode=mode)
+        f = aesara.function([x], s, mode=mode)
         assert check_stack_trace(f, ops_to_check=sigmoid)
         topo = f.maker.fgraph.toposort()
         assert topo[0].op == sigmoid
         assert len(topo) == 1
 
         mode = self.get_mode().including("local_hard_sigmoid")
-        f = theano.function([x], s, mode=mode)
+        f = aesara.function([x], s, mode=mode)
         topo = f.maker.fgraph.toposort()
         assert not any([n.op == sigmoid for n in topo])
         f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
 
         mode2 = mode.excluding("fusion").excluding("inplace")
-        f2 = theano.function([x], s, mode=mode2)
+        f2 = aesara.function([x], s, mode=mode2)
         assert check_stack_trace(f2, ops_to_check=clip)
 
 
 class TestSoftplusOpts:
     def setup_method(self):
-        if theano.config.mode == "FAST_COMPILE":
-            m = theano.compile.mode.get_mode("FAST_RUN").excluding(
+        if aesara.config.mode == "FAST_COMPILE":
+            m = aesara.compile.mode.get_mode("FAST_RUN").excluding(
                 "local_elemwise_fusion"
             )
         else:
-            m = theano.compile.mode.get_default_mode().excluding(
+            m = aesara.compile.mode.get_default_mode().excluding(
                 "local_elemwise_fusion"
             )
         self.m = m
@@ -466,46 +466,46 @@ def test_logsigm_to_softplus(self):
         x = vector()
 
         out = log(sigmoid(x))
-        f = theano.function([x], out, mode=self.m)
+        f = aesara.function([x], out, mode=self.m)
 
         # Fix ticket #4581 first
         # assert check_stack_trace(
-        #     f, ops_to_check=(theano.scalar.Neg,
+        #     f, ops_to_check=(aesara.scalar.Neg,
         #                      ScalarSoftplus))
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 3
-        assert isinstance(topo[0].op.scalar_op, theano.scalar.Neg)
+        assert isinstance(topo[0].op.scalar_op, aesara.scalar.Neg)
         assert isinstance(topo[1].op.scalar_op, ScalarSoftplus)
-        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
+        assert isinstance(topo[2].op.scalar_op, aesara.scalar.Neg)
         f(np.random.rand(54).astype(config.floatX))
 
     def test_log1msigm_to_softplus(self):
         x = matrix()
 
         out = log(1 - sigmoid(x))
-        f = theano.function([x], out, mode=self.m)
+        f = aesara.function([x], out, mode=self.m)
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 2
         assert isinstance(topo[0].op.scalar_op, ScalarSoftplus)
-        assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
+        assert isinstance(topo[1].op.scalar_op, aesara.scalar.Neg)
         # assert check_stack_trace(f, ops_to_check='all')
         f(np.random.rand(54, 11).astype(config.floatX))
 
         # Same test with a flatten
         out = log(1 - tt.flatten(sigmoid(x)))
-        f = theano.function([x], out, mode=self.m)
+        f = aesara.function([x], out, mode=self.m)
 
         # assert check_stack_trace(f, ops_to_check='all')
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 3
         assert tt.is_flat(topo[0].outputs[0])
         assert isinstance(topo[1].op.scalar_op, ScalarSoftplus)
-        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
+        assert isinstance(topo[2].op.scalar_op, aesara.scalar.Neg)
         f(np.random.rand(54, 11).astype(config.floatX))
 
         # Same test with a reshape
         out = log(1 - sigmoid(x).reshape([x.size]))
-        f = theano.function([x], out, mode=self.m)
+        f = aesara.function([x], out, mode=self.m)
         topo = f.maker.fgraph.toposort()
         # assert len(topo) == 3
         assert any(isinstance(node.op, Reshape) for node in topo)
@@ -519,14 +519,14 @@ def test_log1msigm_to_softplus(self):
         f(np.random.rand(54, 11).astype(config.floatX))
 
     def test_log1pexp_to_softplus(self):
-        m = theano.config.mode
+        m = aesara.config.mode
         if m == "FAST_COMPILE":
             m = "FAST_RUN"
 
         x = vector()
 
         out = log(1 + exp(x))
-        f = theano.function([x], out, mode=self.m)
+        f = aesara.function([x], out, mode=self.m)
 
         # Fix ticket #4581 first
         # assert check_stack_trace(f, ops_to_check='all')
diff --git a/tests/tensor/random/test_basic.py b/tests/tensor/random/test_basic.py
index fbc0723db3..0854272855 100644
--- a/tests/tensor/random/test_basic.py
+++ b/tests/tensor/random/test_basic.py
@@ -5,13 +5,13 @@
 import scipy.stats as stats
 from pytest import fixture, importorskip, raises
 
-import theano.tensor as tt
-from theano.configdefaults import config
-from theano.graph.basic import Constant, Variable, graph_inputs
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import get_test_value
-from theano.tensor.basic_opt import ShapeFeature
-from theano.tensor.random.basic import (
+import aesara.tensor as tt
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant, Variable, graph_inputs
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import get_test_value
+from aesara.tensor.basic_opt import ShapeFeature
+from aesara.tensor.random.basic import (
     bernoulli,
     beta,
     betabinom,
@@ -36,11 +36,11 @@
     truncexpon,
     uniform,
 )
-from theano.tensor.type import iscalar, scalar, tensor
+from aesara.tensor.type import iscalar, scalar, tensor
 
 
 @fixture(scope="module", autouse=True)
-def set_theano_flags():
+def set_aesara_flags():
     with config.change_flags(cxx="", compute_test_value="raise"):
         yield
 
@@ -59,7 +59,7 @@ def rv_numpy_tester(rv, *params, **kwargs):
 
         test_fn = getattr(np.random, name)
 
-    theano_res = rv(*params, **kwargs)
+    aesara_res = rv(*params, **kwargs)
 
     param_vals = [get_test_value(p) if isinstance(p, Variable) else p for p in params]
     kwargs_vals = {
@@ -72,14 +72,14 @@ def rv_numpy_tester(rv, *params, **kwargs):
 
     numpy_res = np.asarray(test_fn(*param_vals, **kwargs_vals))
 
-    assert theano_res.type.numpy_dtype.kind == numpy_res.dtype.kind
+    assert aesara_res.type.numpy_dtype.kind == numpy_res.dtype.kind
 
     numpy_shape = np.shape(numpy_res)
     numpy_bcast = [s == 1 for s in numpy_shape]
-    np.testing.assert_array_equal(theano_res.type.broadcastable, numpy_bcast)
+    np.testing.assert_array_equal(aesara_res.type.broadcastable, numpy_bcast)
 
-    theano_res_val = theano_res.get_test_value()
-    np.testing.assert_array_equal(theano_res_val.shape, numpy_res.shape)
+    aesara_res_val = aesara_res.get_test_value()
+    np.testing.assert_array_equal(aesara_res_val.shape, numpy_res.shape)
 
 
 def test_uniform_samples():
diff --git a/tests/tensor/random/test_op.py b/tests/tensor/random/test_op.py
index eb6514138b..c1ce1d6ebc 100644
--- a/tests/tensor/random/test_op.py
+++ b/tests/tensor/random/test_op.py
@@ -1,19 +1,19 @@
 import numpy as np
 from pytest import fixture, raises
 
-import theano.tensor as tt
-from theano import config
-from theano.assert_op import Assert
-from theano.gradient import NullTypeGradError, grad
-from theano.tensor.math import eq
-from theano.tensor.random.basic import normal
-from theano.tensor.random.op import RandomVariable, default_shape_from_params, observed
-from theano.tensor.type import all_dtypes, iscalar, tensor, vector
-from theano.tensor.type_other import NoneTypeT
+import aesara.tensor as tt
+from aesara import config
+from aesara.assert_op import Assert
+from aesara.gradient import NullTypeGradError, grad
+from aesara.tensor.math import eq
+from aesara.tensor.random.basic import normal
+from aesara.tensor.random.op import RandomVariable, default_shape_from_params, observed
+from aesara.tensor.type import all_dtypes, iscalar, tensor, vector
+from aesara.tensor.type_other import NoneTypeT
 
 
 @fixture(scope="module", autouse=True)
-def set_theano_flags():
+def set_aesara_flags():
     with config.change_flags(cxx="", compute_test_value="raise"):
         yield
 
diff --git a/tests/tensor/random/test_opt.py b/tests/tensor/random/test_opt.py
index 6b42f712a4..f8b60e3842 100644
--- a/tests/tensor/random/test_opt.py
+++ b/tests/tensor/random/test_opt.py
@@ -1,30 +1,30 @@
 import numpy as np
 import pytest
 
-import theano.tensor as tt
-from theano import config, shared
-from theano.compile.function import function
-from theano.compile.mode import Mode
-from theano.graph.basic import Constant
-from theano.graph.fg import FunctionGraph
-from theano.graph.opt import EquilibriumOptimizer
-from theano.graph.optdb import Query
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.random.basic import (
+import aesara.tensor as tt
+from aesara import config, shared
+from aesara.compile.function import function
+from aesara.compile.mode import Mode
+from aesara.graph.basic import Constant
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.opt import EquilibriumOptimizer
+from aesara.graph.optdb import Query
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.random.basic import (
     dirichlet,
     multivariate_normal,
     normal,
     poisson,
     uniform,
 )
-from theano.tensor.random.op import RandomVariable
-from theano.tensor.random.opt import (
+from aesara.tensor.random.op import RandomVariable
+from aesara.tensor.random.opt import (
     lift_rv_shapes,
     local_dimshuffle_rv_lift,
     local_subtensor_rv_lift,
 )
-from theano.tensor.subtensor import AdvancedSubtensor, AdvancedSubtensor1, Subtensor
-from theano.tensor.type import iscalar, vector
+from aesara.tensor.subtensor import AdvancedSubtensor, AdvancedSubtensor1, Subtensor
+from aesara.tensor.type import iscalar, vector
 
 
 inplace_mode = Mode("py", Query(include=["random_make_inplace"], exclude=[]))
@@ -391,7 +391,7 @@ def test_Subtensor_lift(indices, lifted, dist_op, dist_params, size):
         s_tt.tag.test_value = s
         size_tt.append(s_tt)
 
-    from theano.tensor.subtensor import as_index_constant
+    from aesara.tensor.subtensor import as_index_constant
 
     indices_tt = ()
     for i in indices:
diff --git a/tests/tensor/random/test_type.py b/tests/tensor/random/test_type.py
index cb414db3cc..bd16ebc3b5 100644
--- a/tests/tensor/random/test_type.py
+++ b/tests/tensor/random/test_type.py
@@ -4,9 +4,9 @@
 import numpy as np
 import pytest
 
-from theano import shared
-from theano.compile.ops import ViewOp
-from theano.tensor.random.type import RandomStateType, random_state_type
+from aesara import shared
+from aesara.compile.ops import ViewOp
+from aesara.tensor.random.type import RandomStateType, random_state_type
 
 
 # @pytest.mark.skipif(
@@ -15,8 +15,8 @@
 def test_view_op_c_code():
     # TODO: It might be good to make sure that the registered C code works
     # (even though it's basically copy-paste from other registered `Op`s).
-    # from theano.compile.ops import view_op
-    # from theano.link.c.basic import CLinker
+    # from aesara.compile.ops import view_op
+    # from aesara.link.c.basic import CLinker
     # rng_var = random_state_type()
     # rng_view = view_op(rng_var)
     # function(
diff --git a/tests/tensor/random/test_utils.py b/tests/tensor/random/test_utils.py
index 0f2f41e6f6..a66dc9da8e 100644
--- a/tests/tensor/random/test_utils.py
+++ b/tests/tensor/random/test_utils.py
@@ -1,16 +1,16 @@
 import numpy as np
 import pytest
 
+from aesara import config, function
+from aesara.compile.mode import Mode
+from aesara.graph.optdb import Query
+from aesara.tensor.random.utils import RandomStream, broadcast_params
+from aesara.tensor.type import matrix, tensor
 from tests import unittest_tools as utt
-from theano import config, function
-from theano.compile.mode import Mode
-from theano.graph.optdb import Query
-from theano.tensor.random.utils import RandomStream, broadcast_params
-from theano.tensor.type import matrix, tensor
 
 
 @pytest.fixture(scope="module", autouse=True)
-def set_theano_flags():
+def set_aesara_flags():
     opts = Query(include=[None], exclude=[])
     py_mode = Mode("py", opts)
     with config.change_flags(mode=py_mode, compute_test_value="warn"):
@@ -67,7 +67,7 @@ def test_broadcast_params():
     assert np.array_equal(res[0], mean)
     assert np.array_equal(res[1], np.broadcast_to(cov, (3, 1, 1)))
 
-    # Try it in Theano
+    # Try it in Aesara
     with config.change_flags(compute_test_value="raise"):
         mean = tensor(config.floatX, [False, True])
         mean.tag.test_value = np.array([[0], [10], [100]], dtype=config.floatX)
@@ -241,7 +241,7 @@ def test_default_updates(self):
     def test_multiple_rng_aliasing(self):
         # Test that when we have multiple random number generators, we do not alias
         # the state_updates member. `state_updates` can be useful when attempting to
-        # copy the (random) state between two similar theano graphs. The test is
+        # copy the (random) state between two similar aesara graphs. The test is
         # meant to detect a previous bug where state_updates was initialized as a
         # class-attribute, instead of the __init__ function.
 
@@ -251,7 +251,7 @@ def test_multiple_rng_aliasing(self):
         assert rng1.gen_seedgen is not rng2.gen_seedgen
 
     def test_random_state_transfer(self):
-        # Test that random state can be transferred from one theano graph to another.
+        # Test that random state can be transferred from one aesara graph to another.
 
         class Graph:
             def __init__(self, seed=123):
diff --git a/tests/tensor/random/test_var.py b/tests/tensor/random/test_var.py
index c2a6f4c9d2..1b1057ab7f 100644
--- a/tests/tensor/random/test_var.py
+++ b/tests/tensor/random/test_var.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from theano import shared
+from aesara import shared
 
 
 def test_RandomStateSharedVariable():
diff --git a/tests/tensor/signal/test_conv.py b/tests/tensor/signal/test_conv.py
index 5a09fc6b17..634bfcdad9 100644
--- a/tests/tensor/signal/test_conv.py
+++ b/tests/tensor/signal/test_conv.py
@@ -1,11 +1,11 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara.tensor.math import _allclose
+from aesara.tensor.signal import conv
+from aesara.tensor.type import TensorType, dtensor3, dtensor4, dvector, matrix
 from tests import unittest_tools as utt
-from theano.tensor.math import _allclose
-from theano.tensor.signal import conv
-from theano.tensor.type import TensorType, dtensor3, dtensor4, dvector, matrix
 
 
 _ = pytest.importorskip("scipy.signal")
@@ -29,19 +29,19 @@ def validate(self, image_shape, filter_shape, out_dim, verify_grad=True):
         if filter_dim != 3:
             nkern = 1
 
-        # THEANO IMPLEMENTATION ############
+        # AESARA IMPLEMENTATION ############
         # we create a symbolic function so that verify_grad can work
         def sym_conv2d(input, filters):
             return conv.conv2d(input, filters)
 
         output = sym_conv2d(input, filters)
         assert output.ndim == out_dim
-        theano_conv = theano.function([input, filters], output)
+        aesara_conv = aesara.function([input, filters], output)
 
         # initialize input and compute result
         image_data = np.random.random(image_shape)
         filter_data = np.random.random(filter_shape)
-        theano_output = theano_conv(image_data, filter_data)
+        aesara_output = aesara_conv(image_data, filter_data)
 
         # REFERENCE IMPLEMENTATION ############
         out_shape2d = np.array(image_shape[-2:]) - np.array(filter_shape[-2:]) + 1
@@ -50,13 +50,13 @@ def sym_conv2d(input, filters):
         # reshape as 3D input tensors to make life easier
         image_data3d = image_data.reshape((bsize,) + image_shape[-2:])
         filter_data3d = filter_data.reshape((nkern,) + filter_shape[-2:])
-        # reshape theano output as 4D to make life easier
-        theano_output4d = theano_output.reshape(
+        # reshape aesara output as 4D to make life easier
+        aesara_output4d = aesara_output.reshape(
             (
                 bsize,
                 nkern,
             )
-            + theano_output.shape[-2:]
+            + aesara_output.shape[-2:]
         )
 
         # loop over mini-batches (if required)
@@ -78,14 +78,14 @@ def sym_conv2d(input, filters):
                             * filter2d[::-1, ::-1]
                         ).sum()
 
-                assert _allclose(theano_output4d[b, k, :, :], output2d)
+                assert _allclose(aesara_output4d[b, k, :, :], output2d)
 
         # TEST GRADIENT ############
         if verify_grad:
             utt.verify_grad(sym_conv2d, [image_data, filter_data])
 
     @pytest.mark.skipif(
-        theano.config.cxx == "",
+        aesara.config.cxx == "",
         reason="conv2d tests need a c++ compiler",
     )
     def test_basic(self):
diff --git a/tests/tensor/signal/test_pool.py b/tests/tensor/signal/test_pool.py
index a41a8a70f4..a336dada8d 100644
--- a/tests/tensor/signal/test_pool.py
+++ b/tests/tensor/signal/test_pool.py
@@ -4,12 +4,11 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from theano import function
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.signal.pool import (
+import aesara
+import aesara.tensor as tt
+from aesara import function
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.signal.pool import (
     AveragePoolGrad,
     DownsampleFactorMaxGradGrad,
     MaxPoolGrad,
@@ -18,7 +17,7 @@
     pool_2d,
     pool_3d,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     TensorType,
     dmatrix,
     dtensor3,
@@ -29,6 +28,7 @@
     tensor4,
     vector,
 )
+from tests import unittest_tools as utt
 
 
 class TestDownsampleFactorMax(utt.InferShapeTester):
@@ -361,7 +361,7 @@ def test_DownsampleFactorMax(self):
         ):
             (maxpoolshp, inputsize) = example
             imval = rng.rand(*inputsize)
-            images = theano.shared(imval)
+            images = aesara.shared(imval)
 
             # Pure Numpy computation
             numpy_output_val = self.numpy_max_pool_nd(
@@ -469,7 +469,7 @@ def test_DownsampleFactorMaxStride(self):
             (maxpoolshp, stride, ignore_border, inputshp, outputshp) = example
             # generate random images
             imval = rng.rand(*inputshp)
-            images = theano.shared(imval)
+            images = aesara.shared(imval)
             # Pool op
             numpy_output_val = self.numpy_max_pool_nd_stride(
                 imval, maxpoolshp, ignore_border, stride, mode
@@ -556,7 +556,7 @@ def test_DownsampleFactorMaxPaddingStride(self):
         ):
             (maxpoolshp, stridesize, padsize, inputsize) = example
             imval = rng.rand(*inputsize) - 0.5
-            images = theano.shared(imval)
+            images = aesara.shared(imval)
 
             numpy_output_val = self.numpy_max_pool_nd_stride_pad(
                 imval, maxpoolshp, ignore_border, stridesize, padsize, mode
@@ -936,7 +936,7 @@ def test_DownsampleFactorMax_hessian(self):
         y = pool_2d(input=z, ws=(2, 2), ignore_border=True)
         C = tt.exp(tt_sum(y))
 
-        grad_hess = theano.gradient.hessian(cost=C, wrt=x_vec)
+        grad_hess = aesara.gradient.hessian(cost=C, wrt=x_vec)
         fn_hess = function(inputs=[x_vec], outputs=grad_hess)
 
         # The value has been manually computed from the theoretical gradient,
@@ -1074,10 +1074,10 @@ def test_max_pool_2d_2D_same_size(self):
         rng = np.random.RandomState(utt.fetch_seed())
         test_input_array = np.array(
             [[[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]]]
-        ).astype(theano.config.floatX)
+        ).astype(aesara.config.floatX)
         test_answer_array = np.array(
             [[[[0.0, 0.0, 0.0, 0.0], [0.0, 6.0, 0.0, 8.0]]]]
-        ).astype(theano.config.floatX)
+        ).astype(aesara.config.floatX)
         input = tensor4(name="input")
         patch_size = (2, 2)
         op = max_pool_2d_same_size(input, patch_size)
@@ -1221,8 +1221,8 @@ def test_pooling_with_tensor_vars(self):
         for ignore_border in [True, False]:
             for mode in ["max", "sum", "average_inc_pad", "average_exc_pad"]:
                 y = pool_2d(x, window_size, ignore_border, stride, padding, mode)
-                dx = theano.gradient.grad(y.sum(), x)
-                var_fct = theano.function([x, window_size, stride, padding], [y, dx])
+                dx = aesara.gradient.grad(y.sum(), x)
+                var_fct = aesara.function([x, window_size, stride, padding], [y, dx])
                 for ws in (4, 2, 5):
                     for st in (2, 3):
                         for pad in (0, 1):
@@ -1236,8 +1236,8 @@ def test_pooling_with_tensor_vars(self):
                             y = pool_2d(
                                 x, (ws, ws), ignore_border, (st, st), (pad, pad), mode
                             )
-                            dx = theano.gradient.grad(y.sum(), x)
-                            fix_fct = theano.function([x], [y, dx])
+                            dx = aesara.gradient.grad(y.sum(), x)
+                            fix_fct = aesara.function([x], [y, dx])
                             var_y, var_dx = var_fct(
                                 data, (ws, ws), (st, st), (pad, pad)
                             )
@@ -1263,8 +1263,8 @@ def test_pooling_with_tensor_vars_deprecated_interface(self):
                     padding=padding,
                     mode=mode,
                 )
-                dx = theano.gradient.grad(y.sum(), x)
-                var_fct = theano.function([x, window_size, stride, padding], [y, dx])
+                dx = aesara.gradient.grad(y.sum(), x)
+                var_fct = aesara.function([x, window_size, stride, padding], [y, dx])
                 ws = 5
                 st = 3
                 pad = 1
@@ -1283,8 +1283,8 @@ def test_pooling_with_tensor_vars_deprecated_interface(self):
                     padding=(pad, pad),
                     mode=mode,
                 )
-                dx = theano.gradient.grad(y.sum(), x)
-                fix_fct = theano.function([x], [y, dx])
+                dx = aesara.gradient.grad(y.sum(), x)
+                fix_fct = aesara.function([x], [y, dx])
                 var_y, var_dx = var_fct(data, (ws, ws), (st, st), (pad, pad))
                 fix_y, fix_dx = fix_fct(data)
                 utt.assert_allclose(var_y, fix_y)
diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py
index 347f88bf2c..4446c92a0c 100644
--- a/tests/tensor/test_basic.py
+++ b/tests/tensor/test_basic.py
@@ -8,40 +8,21 @@
 import pytest
 from numpy.testing import assert_array_equal
 
-import theano
-import theano.scalar as ts
-import theano.tensor.basic as tt
-import theano.tensor.math as tm
-from tests import unittest_tools as utt
-from tests.tensor.utils import (
-    ALL_DTYPES,
-    COMPLEX_DTYPES,
-    REAL_DTYPES,
-    _good_broadcast_unary_normal,
-    _grad_broadcast_unary_normal,
-    eval_outputs,
-    get_numeric_types,
-    inplace_func,
-    makeBroadcastTester,
-    makeTester,
-    multi_dtype_cast_checks,
-    multi_dtype_checks,
-    rand,
-    rand_of_dtype,
-    randint,
-    randint_ranged,
-)
-from theano import compile, config, function, shared
-from theano.assert_op import Assert
-from theano.compile.io import In, Out
-from theano.compile.mode import get_default_mode
-from theano.compile.ops import DeepCopyOp
-from theano.gradient import grad, hessian
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.misc.safe_asarray import _asarray
-from theano.scalar import autocast_float, autocast_float_as
-from theano.tensor.basic import (
+import aesara
+import aesara.scalar as ts
+import aesara.tensor.basic as tt
+import aesara.tensor.math as tm
+from aesara import compile, config, function, shared
+from aesara.assert_op import Assert
+from aesara.compile.io import In, Out
+from aesara.compile.mode import get_default_mode
+from aesara.compile.ops import DeepCopyOp
+from aesara.gradient import grad, hessian
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.misc.safe_asarray import _asarray
+from aesara.scalar import autocast_float, autocast_float_as
+from aesara.tensor.basic import (
     Alloc,
     AllocDiag,
     AllocEmpty,
@@ -101,12 +82,12 @@
     vertical_stack,
     zeros_like,
 )
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.exceptions import EmptyConstantError, NotScalarConstantError
-from theano.tensor.math import dense_dot, eq
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.shape import Reshape, Shape, Shape_i, shape_padright
-from theano.tensor.type import (
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.exceptions import EmptyConstantError, NotScalarConstantError
+from aesara.tensor.math import dense_dot, eq
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.shape import Reshape, Shape, Shape_i, shape_padright
+from aesara.tensor.type import (
     TensorType,
     bvector,
     col,
@@ -140,8 +121,27 @@
     vectors,
     wvector,
 )
-from theano.tensor.var import TensorConstant
-from theano.utils import PYTHON_INT_BITWIDTH
+from aesara.tensor.var import TensorConstant
+from aesara.utils import PYTHON_INT_BITWIDTH
+from tests import unittest_tools as utt
+from tests.tensor.utils import (
+    ALL_DTYPES,
+    COMPLEX_DTYPES,
+    REAL_DTYPES,
+    _good_broadcast_unary_normal,
+    _grad_broadcast_unary_normal,
+    eval_outputs,
+    get_numeric_types,
+    inplace_func,
+    makeBroadcastTester,
+    makeTester,
+    multi_dtype_cast_checks,
+    multi_dtype_checks,
+    rand,
+    rand_of_dtype,
+    randint,
+    randint_ranged,
+)
 
 
 if config.mode == "FAST_COMPILE":
@@ -526,7 +526,7 @@ def test_make_vector(self):
 class TestAlloc:
     dtype = config.floatX
     mode = mode_opt
-    shared = staticmethod(theano.shared)
+    shared = staticmethod(aesara.shared)
     allocs = [Alloc()] * 3
 
     def setup_method(self):
@@ -553,9 +553,9 @@ def test_alloc_constant_folding(self):
         ):
             derp = tt_sum(dense_dot(subtensor, variables))
 
-            fobj = theano.function([some_vector], derp, mode=self.mode)
-            grad_derp = theano.grad(derp, some_vector)
-            fgrad = theano.function([some_vector], grad_derp, mode=self.mode)
+            fobj = aesara.function([some_vector], derp, mode=self.mode)
+            grad_derp = aesara.grad(derp, some_vector)
+            fgrad = aesara.function([some_vector], grad_derp, mode=self.mode)
 
             topo_obj = fobj.maker.fgraph.toposort()
             assert np.sum([isinstance(node.op, type(alloc_)) for node in topo_obj]) == 0
@@ -575,42 +575,42 @@ def test_alloc_output(self):
             # we do not want it to be constant-folded
             out = alloc_(val, 50, 60)
 
-            f = theano.function([], out, mode=self.mode)
+            f = aesara.function([], out, mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert np.sum([isinstance(node.op, type(alloc_)) for node in topo]) == 1
             assert not isinstance(topo[0].op, DeepCopyOp)
 
     def test_ones(self):
         for shp in [[], 1, [1], [1, 2], [1, 2, 3], np.r_[1, 2, 3]]:
-            ones = theano.function([], [tt.ones(shp)], mode=self.mode)
+            ones = aesara.function([], [tt.ones(shp)], mode=self.mode)
             assert np.allclose(ones(), np.ones(shp))
 
         # scalar doesn't have to be provided as input
         x = scalar()
         shp = []
-        ones_scalar = theano.function([], [tt.ones(x.shape)], mode=self.mode)
+        ones_scalar = aesara.function([], [tt.ones(x.shape)], mode=self.mode)
         assert np.allclose(ones_scalar(), np.ones(shp))
 
         for (typ, shp) in [(vector, [3]), (matrix, [3, 4])]:
             x = typ()
-            ones_tensor = theano.function([x], [tt.ones(x.shape)], mode=self.mode)
+            ones_tensor = aesara.function([x], [tt.ones(x.shape)], mode=self.mode)
             inp = np.zeros(shp, dtype=config.floatX)
             assert np.allclose(ones_tensor(inp), np.ones(shp))
 
     def test_zeros(self):
         for shp in [[], 1, [1], [1, 2], [1, 2, 3], np.r_[1, 2, 3]]:
-            zeros = theano.function([], [tt.zeros(shp)], mode=self.mode)
+            zeros = aesara.function([], [tt.zeros(shp)], mode=self.mode)
             assert np.allclose(zeros(), np.zeros(shp))
 
         # scalar doesn't have to be provided as input
         x = scalar()
         shp = []
-        zeros_scalar = theano.function([], [tt.zeros(x.shape)], mode=self.mode)
+        zeros_scalar = aesara.function([], [tt.zeros(x.shape)], mode=self.mode)
         assert np.allclose(zeros_scalar(), np.zeros(shp))
 
         for (typ, shp) in [(vector, [3]), (matrix, [3, 4])]:
             x = typ()
-            zeros_tensor = theano.function([x], [tt.zeros(x.shape)], mode=self.mode)
+            zeros_tensor = aesara.function([x], [tt.zeros(x.shape)], mode=self.mode)
             inp = np.zeros(shp, dtype=config.floatX)
             assert np.allclose(zeros_tensor(inp), np.zeros(shp))
 
@@ -618,7 +618,7 @@ def test_zeros(self):
 # This is slow for the ('int8', 3) version.
 def test_eye():
     def check(dtype, N, M_=None, k=0):
-        # Theano does not accept None as a tensor.
+        # Aesara does not accept None as a tensor.
         # So we must use a real value.
         M = M_
         # Currently DebugMode does not support None as inputs even if this is
@@ -652,7 +652,7 @@ def check(dtype, N, M_=None, k=0):
 class TestTriangle:
     def test_tri(self):
         def check(dtype, N, M_=None, k=0):
-            # Theano does not accept None as a tensor.
+            # Aesara does not accept None as a tensor.
             # So we must use a real value.
             M = M_
             # Currently DebugMode does not support None as inputs even if this is
@@ -888,12 +888,12 @@ def test_nan_inf_constant_signature():
     # Also test that nan !=0 and nan != nan.
     x = scalar()
     mode = get_default_mode()
-    if isinstance(mode, theano.compile.debugmode.DebugMode):
+    if isinstance(mode, aesara.compile.debugmode.DebugMode):
         # Disable the check preventing usage of NaN / Inf values.
         # We first do a copy of the mode to avoid side effects on other tests.
         mode = copy(mode)
         mode.check_isfinite = False
-    f = theano.function([x], eq(x, np.nan), mode=mode)
+    f = aesara.function([x], eq(x, np.nan), mode=mode)
 
     assert f(0) == 0
     assert f(np.nan) == 0
@@ -905,7 +905,7 @@ def test_basic_allclose():
 
 
 def test_get_vector_length():
-    x = theano.shared(np.zeros((2, 3, 4, 5)))
+    x = aesara.shared(np.zeros((2, 3, 4, 5)))
     assert len(list(x.shape)) == 4
     assert len(list(x.shape[2:4])) == 2
     assert len(list(x.shape[2:])) == 2
@@ -945,7 +945,7 @@ def test_get_vector_length():
     assert 3 == get_vector_length(triple)
 
     a, b, c = triple
-    mode = theano.compile.get_default_mode().excluding("constant_folding")
+    mode = aesara.compile.get_default_mode().excluding("constant_folding")
     f = function([x, y], [b, c, a], mode=mode)
     topo = f.maker.fgraph.toposort()
     assert [True for node in topo if isinstance(node.op, MakeVector)]
@@ -958,7 +958,7 @@ class TestJoinAndSplit:
     def setup_method(self):
         Join.debug = False
         utt.seed_rng()
-        self.mode = theano.compile.get_default_mode().excluding("constant_folding")
+        self.mode = aesara.compile.get_default_mode().excluding("constant_folding")
         self.join_op = Join()
         self.split_op_class = Split
         self.make_vector_op = MakeVector()
@@ -971,7 +971,7 @@ def setup_method(self):
         self.shared = shared
 
     def eval_outputs_and_check_join(self, outputs):
-        f = theano.function([], outputs, self.mode)
+        f = aesara.function([], outputs, self.mode)
         topo = f.maker.fgraph.toposort()
         assert [True for node in topo if isinstance(node.op, type(self.join_op))]
         variables = f()
@@ -982,7 +982,7 @@ def eval_outputs_and_check_join(self, outputs):
     def eval_outputs_and_check_vector(self, outputs, make_vector_op=None):
         if make_vector_op is None:
             make_vector_op = self.make_vector_op
-        f = theano.function([], outputs, self.mode)
+        f = aesara.function([], outputs, self.mode)
         topo = f.maker.fgraph.toposort()
         assert [True for node in topo if isinstance(node.op, type(make_vector_op))]
         variables = f()
@@ -1000,7 +1000,7 @@ def test_stack_mixed_type_constants(self):
         # tested only on cpu as gpu support only float32
         a = as_tensor_variable(1)
         b = as_tensor_variable(2.0)
-        c = theano.shared(np.asarray(3.0, dtype=self.floatX))
+        c = aesara.shared(np.asarray(3.0, dtype=self.floatX))
         s = stack([a, b, c])
         want = np.array([1, 2, 3])
         out = self.eval_outputs_and_check_vector([s], MakeVector())
@@ -1130,7 +1130,7 @@ def test_stack_hessian(self):
         # Try some values
         a_v = np.random.rand(4)
         b_v = np.random.rand(4)
-        f = theano.function([a, b], [Ha, Hb])
+        f = aesara.function([a, b], [Ha, Hb])
         Ha_v, Hb_v = f(a_v, b_v)
         # The Hessian is always a matrix full of 2
         assert Ha_v.shape == (4, 4)
@@ -1149,7 +1149,7 @@ def test_stack_hessian2(self):
         # Try some values
         a_v = np.random.rand(4)
         b_v = np.random.rand(4)
-        f = theano.function([a, b], [Ha, Hb])
+        f = aesara.function([a, b], [Ha, Hb])
         Ha_v, Hb_v = f(a_v, b_v)
         # The Hessian is always a matrix full of 0
         assert Ha_v.shape == (4, 4)
@@ -1162,7 +1162,7 @@ def test_join_concatenate_one_element(self):
         # also test that we remove the Join op if there is only 1 input
         m = fmatrix()
         c = tt.concatenate([m])
-        f = theano.function(
+        f = aesara.function(
             inputs=[m], outputs=[c], mode=self.mode.including("local_join_1")
         )
         topo = f.maker.fgraph.toposort()
@@ -1180,19 +1180,19 @@ def test_join_vector(self):
 
     def test_roll(self):
 
-        for get_shift in [lambda a: a, lambda x: theano.shared(x)]:
+        for get_shift in [lambda a: a, lambda x: aesara.shared(x)]:
             # Test simple 1D example
             a = self.shared(np.array([1, 2, 3, 4, 5, 6], dtype=self.floatX))
             b = roll(a, get_shift(2))
             want = np.array([5, 6, 1, 2, 3, 4])
-            out = theano.function([], b)()
+            out = aesara.function([], b)()
 
             assert (out == want).all()
 
             # Test simple 1D example with explicit 0 axis
             b = roll(a, get_shift(-1), 0)
             want = np.array([2, 3, 4, 5, 6, 1])
-            out = theano.function([], b)()
+            out = aesara.function([], b)()
 
             assert (out == want).all()
 
@@ -1201,7 +1201,7 @@ def test_roll(self):
             b = roll(a, get_shift(-2), 1)
 
             want = np.roll(a.get_value(borrow=True), -2, 1)
-            out = theano.function([], b)()
+            out = aesara.function([], b)()
 
             assert (out == want).all()
 
@@ -1210,21 +1210,21 @@ def test_roll(self):
             b = roll(a, get_shift(-2), -2)
 
             want = np.roll(a.get_value(borrow=True), -2, -2)
-            out = theano.function([], b)()
+            out = aesara.function([], b)()
 
             assert (out == want).all()
 
             # Test rolling on axis 0
             want = np.roll(a.get_value(borrow=True), -2, 0)
             b = roll(a, get_shift(-2), 0)
-            out = theano.function([], b)()
+            out = aesara.function([], b)()
 
             assert (out == want).all()
 
             # Test rolling on default axis with ndim > 1
             want = np.roll(a.get_value(borrow=True), 2)
             b = roll(a, get_shift(2))
-            out = theano.function([], b)()
+            out = aesara.function([], b)()
 
             assert (out == want).all()
 
@@ -1232,7 +1232,7 @@ def test_roll(self):
             # larger than axis size
             want = np.roll(a.get_value(borrow=True), 4, 0)
             b = roll(a, get_shift(4), 0)
-            out = theano.function([], b)()
+            out = aesara.function([], b)()
 
             assert (out == want).all()
 
@@ -1240,7 +1240,7 @@ def test_roll(self):
             # larger than axis size
             want = np.roll(a.get_value(borrow=True), -4, 0)
             b = roll(a, get_shift(-4), 0)
-            out = theano.function([], b)()
+            out = aesara.function([], b)()
 
             assert (out == want).all()
 
@@ -1402,7 +1402,7 @@ def test_join_matrixC_negative_axis(self):
         b = as_tensor_variable(v)
 
         s = join(-1, a, b)
-        f = theano.function([], [s], mode=self.mode)
+        f = aesara.function([], [s], mode=self.mode)
         topo = f.maker.fgraph.toposort()
         assert [True for node in topo if isinstance(node.op, type(self.join_op))]
 
@@ -1414,7 +1414,7 @@ def test_join_matrixC_negative_axis(self):
         assert np.allclose(got, want)
 
         s = join(-2, a, b)
-        f = theano.function([], [s], mode=self.mode)
+        f = aesara.function([], [s], mode=self.mode)
         topo = f.maker.fgraph.toposort()
         assert [True for node in topo if isinstance(node.op, type(self.join_op))]
 
@@ -1444,7 +1444,7 @@ def test_broadcastable_flag_assignment_mixed_otheraxes(self):
         assert c.type.broadcastable[0] and c.type.broadcastable[2]
         assert not c.type.broadcastable[1]
 
-        # Opt can remplace the int by a Theano constant
+        # Opt can remplace the int by an Aesara constant
         c = self.join_op(constant(1), a, b)
         assert c.type.broadcastable[0] and c.type.broadcastable[2]
         assert not c.type.broadcastable[1]
@@ -1623,7 +1623,7 @@ def get_mat(s1, s2):
 
         # Test dim 0
         z = self.join_op(0, x1, x2, x3)
-        f = theano.function([], z.shape, mode=self.mode)
+        f = aesara.function([], z.shape, mode=self.mode)
         topo = f.maker.fgraph.toposort()
 
         out = f()
@@ -1638,7 +1638,7 @@ def get_mat(s1, s2):
         x2.set_value(get_mat(3, 4))
         x3.set_value(get_mat(3, 5))
         z = self.join_op(1, x1, x2, x3)
-        f = theano.function([], z.shape, mode=self.mode)
+        f = aesara.function([], z.shape, mode=self.mode)
         topo = f.maker.fgraph.toposort()
         out = f()
         assert (out == [3, 13]).all()
@@ -1736,7 +1736,7 @@ def test_join_inplace():
     join = Join(view=0)
     c = join(0, x, z, z)
 
-    f = theano.function([In(x, borrow=True), s], Out(c, borrow=True))
+    f = aesara.function([In(x, borrow=True), s], Out(c, borrow=True))
 
     data = np.array([3, 4, 5], dtype=config.floatX)
     print(f(data, 0))
@@ -2108,7 +2108,7 @@ def test_tile_grad():
     def grad_tile(x, reps, np_x):
         y = tile(x, reps)
         z = y.sum()
-        g = theano.function([x], grad(z, x))
+        g = aesara.function([x], grad(z, x))
         grad_res = g(np_x)
         # The gradient should be the product of the tiling dimensions
         # (since the gradients are additive through the tiling operation)
@@ -2499,28 +2499,28 @@ def test_ogrid_numpy_equiv(self):
             for ng, tg in zip(n, t):
                 utt.assert_allclose(ng, tg.eval())
 
-    def test_mgrid_theano_variable_numpy_equiv(self):
+    def test_mgrid_aesara_variable_numpy_equiv(self):
         nfmgrid = np.mgrid[0:1:0.1, 1:10:1.0, 10:100:10.0]
         nimgrid = np.mgrid[0:2:1, 1:10:1, 10:100:10]
         i, j, k = dscalars("i", "j", "k")
         l, m, n = iscalars("l", "m", "n")
         tfmgrid = mgrid[i:1:0.1, 1:j:1.0, 10:100:k]
         timgrid = mgrid[l:2:1, 1:m:1, 10:100:n]
-        ff = theano.function([i, j, k], tfmgrid)
-        fi = theano.function([l, m, n], timgrid)
+        ff = aesara.function([i, j, k], tfmgrid)
+        fi = aesara.function([l, m, n], timgrid)
         for n, t in zip((nfmgrid, nimgrid), (ff(0, 10, 10.0), fi(0, 10, 10))):
             for ng, tg in zip(n, t):
                 utt.assert_allclose(ng, tg)
 
-    def test_ogrid_theano_variable_numpy_equiv(self):
+    def test_ogrid_aesara_variable_numpy_equiv(self):
         nfogrid = np.ogrid[0:1:0.1, 1:10:1.0, 10:100:10.0]
         niogrid = np.ogrid[0:2:1, 1:10:1, 10:100:10]
         i, j, k = dscalars("i", "j", "k")
         l, m, n = iscalars("l", "m", "n")
         tfogrid = ogrid[i:1:0.1, 1:j:1.0, 10:100:k]
         tiogrid = ogrid[l:2:1, 1:m:1, 10:100:n]
-        ff = theano.function([i, j, k], tfogrid)
-        fi = theano.function([l, m, n], tiogrid)
+        ff = aesara.function([i, j, k], tfogrid)
+        fi = aesara.function([l, m, n], tiogrid)
         for n, t in zip((nfogrid, niogrid), (ff(0, 10, 10.0), fi(0, 10, 10))):
             for ng, tg in zip(n, t):
                 utt.assert_allclose(ng, tg)
@@ -2706,7 +2706,7 @@ def test_stack():
 
 
 @pytest.mark.skipif(
-    isinstance(get_default_mode(), theano.compile.debugmode.DebugMode),
+    isinstance(get_default_mode(), aesara.compile.debugmode.DebugMode),
     reason="This test fails in DEBUG_MODE, but the generated code is OK. "
     "It is actually a problem of DEBUG_MODE, see #626.",
 )
@@ -2720,7 +2720,7 @@ def test_default():
 
 
 @pytest.mark.skipif(
-    isinstance(get_default_mode(), theano.compile.debugmode.DebugMode),
+    isinstance(get_default_mode(), aesara.compile.debugmode.DebugMode),
     reason="This test fails in DEBUG_MODE, but the generated code is OK. "
     "It is actually a problem of DEBUG_MODE, see #626.",
 )
@@ -2849,7 +2849,7 @@ def ok(z, floatX):
     for floatX in ("float32", "float64"):
         # Go through some typical scalar values.
         # We only consider 'int' and 'long' Python values that can fit
-        # into int64, as that is the maximal integer type that Theano
+        # into int64, as that is the maximal integer type that Aesara
         # supports, and that is the maximal type in Python indexing.
         for x in (
             [2 ** i - 1 for i in range(64)]
@@ -2971,7 +2971,7 @@ def test_patternbroadcast(self):
     def test_infer_shape(self):
         x = matrix()
         y = addbroadcast(x, 0)
-        f = theano.function([x], y.shape)
+        f = aesara.function([x], y.shape)
         assert (f(np.zeros((1, 5), dtype=config.floatX)) == [1, 5]).all()
         topo = f.maker.fgraph.toposort()
         if config.mode != "FAST_COMPILE":
@@ -2981,7 +2981,7 @@ def test_infer_shape(self):
 
         x = matrix()
         y = unbroadcast(x, 0)
-        f = theano.function([x], y.shape)
+        f = aesara.function([x], y.shape)
         assert (f(np.zeros((2, 5), dtype=config.floatX)) == [2, 5]).all()
         topo = f.maker.fgraph.toposort()
         if config.mode != "FAST_COMPILE":
@@ -2992,7 +2992,7 @@ def test_infer_shape(self):
 
         x = row()
         y = unbroadcast(x, 0)
-        f = theano.function([x], y.shape)
+        f = aesara.function([x], y.shape)
         assert (f(np.zeros((1, 5), dtype=config.floatX)) == [1, 5]).all()
         topo = f.maker.fgraph.toposort()
         if config.mode != "FAST_COMPILE":
@@ -3023,7 +3023,7 @@ def test_unalign():
     # out_numpy = 2 * a + 3 * b
 
     av, bv = vectors("ab")
-    f = theano.function([av, bv], 2 * av + 3 * bv)
+    f = aesara.function([av, bv], 2 * av + 3 * bv)
     f.maker.fgraph.toposort()
 
     with pytest.raises(TypeError):
@@ -3036,7 +3036,7 @@ def test_unalign():
     # out_numpy = 2 * a + 3 * b
 
     av, bv = scalars("ab")
-    f = theano.function([av, bv], 2 * av + 3 * bv)
+    f = aesara.function([av, bv], 2 * av + 3 * bv)
     f.maker.fgraph.toposort()
     with pytest.raises(TypeError):
         f(a, b)
@@ -3128,7 +3128,7 @@ def test_shape_i(self):
         assert get_scalar_constant_value(s) == 3
         s = Shape_i(1)(c)
         assert get_scalar_constant_value(s) == 4
-        d = theano.shared(np.random.randn(1, 1), broadcastable=(True, True))
+        d = aesara.shared(np.random.randn(1, 1), broadcastable=(True, True))
         f = ScalarFromTensor()(Shape_i(0)(d))
         assert get_scalar_constant_value(f) == 1
 
@@ -3212,7 +3212,7 @@ def test_scalar(self):
     def test_shared(self):
         # NB: we also test higher order tensors at the same time.
         y = np.zeros((1, 2, 3, 4), dtype=config.floatX)
-        x = theano.shared(y)
+        x = aesara.shared(y)
         assert y.size == function([], x.size)()
 
 
@@ -3243,7 +3243,7 @@ def test_diag(self):
         x = vector()
         g = diag(x)
         assert isinstance(g.owner.op, AllocDiag)
-        f = theano.function([x], g)
+        f = aesara.function([x], g)
         for shp in [5, 0, 1]:
             m = rng.rand(shp).astype(self.floatX)
             v = np.diag(m)
@@ -3255,7 +3255,7 @@ def test_diag(self):
         xx = self.shared(rng.rand(3, 5))
         g = diag(xx)
         assert isinstance(g.owner.op, ExtractDiag)
-        f = theano.function([], g)
+        f = aesara.function([], g)
         for shp in [(5, 3), (3, 5), (5, 1), (1, 5), (5, 0), (0, 5), (1, 0), (0, 1)]:
             m = rng.rand(*shp).astype(self.floatX)
             xx.set_value(m)
@@ -3274,7 +3274,7 @@ def test_infer_shape(self):
 
         x = vector()
         g = diag(x)
-        f = theano.function([x], g.shape)
+        f = aesara.function([x], g.shape)
         topo = f.maker.fgraph.toposort()
         if config.mode != "FAST_COMPILE":
             assert np.sum([isinstance(node.op, AllocDiag) for node in topo]) == 0
@@ -3284,7 +3284,7 @@ def test_infer_shape(self):
 
         x = matrix()
         g = diag(x)
-        f = theano.function([x], g.shape)
+        f = aesara.function([x], g.shape)
         topo = f.maker.fgraph.toposort()
         if config.mode != "FAST_COMPILE":
             assert np.sum([isinstance(node.op, ExtractDiag) for node in topo]) == 0
@@ -3303,7 +3303,7 @@ def test_diag_grad(self):
 class TestAllocDiag:
     def setup_method(self):
         self.alloc_diag = AllocDiag
-        self.mode = theano.compile.mode.get_default_mode()
+        self.mode = aesara.compile.mode.get_default_mode()
 
     def _generator(self):
         dims = 4
@@ -3338,7 +3338,7 @@ def test_alloc_diag_values(self):
                 if np.maximum(axis1, axis2) > len(test_val.shape):
                     continue
                 adiag_op = self.alloc_diag(offset=offset, axis1=axis1, axis2=axis2)
-                f = theano.function([x], adiag_op(x))
+                f = aesara.function([x], adiag_op(x))
                 # AllocDiag and extract the diagonal again
                 # to check
                 diag_arr = f(test_val)
@@ -3346,9 +3346,9 @@ def test_alloc_diag_values(self):
                 assert np.all(rediag == test_val)
 
                 # Test infer_shape
-                f_shape = theano.function([x], adiag_op(x).shape, mode="FAST_RUN")
+                f_shape = aesara.function([x], adiag_op(x).shape, mode="FAST_RUN")
 
-                theano.printing.debugprint(f_shape.maker.fgraph.outputs[0])
+                aesara.printing.debugprint(f_shape.maker.fgraph.outputs[0])
                 output_shape = f_shape(test_val)
                 assert not any(
                     isinstance(node.op, self.alloc_diag)
@@ -3361,10 +3361,10 @@ def test_alloc_diag_values(self):
 
                 diag_x = adiag_op(x)
                 sum_diag_x = tt_sum(diag_x)
-                grad_x = theano.grad(sum_diag_x, x)
-                grad_diag_x = theano.grad(sum_diag_x, diag_x)
-                f_grad_x = theano.function([x], grad_x, mode=self.mode)
-                f_grad_diag_x = theano.function([x], grad_diag_x, mode=self.mode)
+                grad_x = aesara.grad(sum_diag_x, x)
+                grad_diag_x = aesara.grad(sum_diag_x, diag_x)
+                f_grad_x = aesara.function([x], grad_x, mode=self.mode)
+                f_grad_diag_x = aesara.function([x], grad_diag_x, mode=self.mode)
                 grad_input = f_grad_x(test_val)
                 grad_diag_input = f_grad_diag_x(test_val)
                 true_grad_input = np.diagonal(
@@ -3375,11 +3375,11 @@ def test_alloc_diag_values(self):
 
 
 class TestNumpyAssumptions:
-    # Verify that some assumptions Theano makes on Numpy's behavior still hold.
+    # Verify that some assumptions Aesara makes on Numpy's behavior still hold.
     def test_ndarray_copy(self):
         # A copy or deepcopy of the ndarray type should not create a new object.
         #
-        # This is because Theano makes some comparisons of the form:
+        # This is because Aesara makes some comparisons of the form:
         #     if type(x) is np.ndarray
         assert copy(np.ndarray) is np.ndarray
         assert deepcopy(np.ndarray) is np.ndarray
@@ -3387,7 +3387,7 @@ def test_ndarray_copy(self):
     def test_dtype_equality(self):
         # Ensure dtype string comparisons are consistent.
         #
-        # Theano often uses string representations of dtypes (e.g. 'float32'). We
+        # Aesara often uses string representations of dtypes (e.g. 'float32'). We
         # need to make sure that comparing the string representations is the same
         # as comparing the dtype objects themselves.
         dtypes = get_numeric_types(with_complex=True)
@@ -3407,7 +3407,7 @@ def test_transpose():
     x2v = np.arange(24).reshape(2, 12)
     x3v = np.arange(24).reshape(2, 3, 4)
 
-    f = theano.function(
+    f = aesara.function(
         [x1, x2, x3],
         [
             tt.transpose(x1),
@@ -4004,8 +4004,8 @@ def test_infer_shape(self):
             A = np.asarray(np.random.rand(*shp1) * shp2[0], dtype="int32")
             C = np.asarray(np.random.rand(*shp2) * shp2[0], dtype="float32")
             self._compile_and_check(
-                [a, c],  # theano.function inputs
-                [self.op(a, c)],  # theano.function outputs
+                [a, c],  # aesara.function inputs
+                [self.op(a, c)],  # aesara.function outputs
                 # Always use not square matrix!
                 # inputs data
                 [A, C],
@@ -4029,8 +4029,8 @@ def test_infer_shape_tuple(self):
         assert np.allclose(f(A, B, C).shape, shape)
 
         self._compile_and_check(
-            [a, b, c],  # theano.function inputs
-            [self.op(a, (b, c))],  # theano.function outputs
+            [a, b, c],  # aesara.function inputs
+            [self.op(a, (b, c))],  # aesara.function outputs
             # Always use not square matrix!
             # inputs data
             [A, B, C],
@@ -4041,7 +4041,7 @@ def test_infer_shape_tuple(self):
 
 def test_allocempty():
     # Test that we allocated correctly
-    f = theano.function([], AllocEmpty("float32")(2, 3))
+    f = aesara.function([], AllocEmpty("float32")(2, 3))
     assert len(f.maker.fgraph.apply_nodes) == 1
     out = f()
 
diff --git a/tests/tensor/test_basic_opt.py b/tests/tensor/test_basic_opt.py
index c7299887d0..fcbf806d31 100644
--- a/tests/tensor/test_basic_opt.py
+++ b/tests/tensor/test_basic_opt.py
@@ -6,32 +6,31 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.scalar as ts
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from theano import pprint, shared
-from theano.assert_op import Assert
-from theano.compile import optdb
-from theano.compile.debugmode import DebugMode
-from theano.compile.function import function
-from theano.compile.mode import Mode, get_default_mode, get_mode
-from theano.compile.ops import DeepCopyOp, deep_copy_op
-from theano.configdefaults import config
-from theano.graph.basic import Apply, Constant
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import Op
-from theano.graph.opt import (
+import aesara
+import aesara.scalar as ts
+import aesara.tensor as tt
+from aesara import pprint, shared
+from aesara.assert_op import Assert
+from aesara.compile import optdb
+from aesara.compile.debugmode import DebugMode
+from aesara.compile.function import function
+from aesara.compile.mode import Mode, get_default_mode, get_mode
+from aesara.compile.ops import DeepCopyOp, deep_copy_op
+from aesara.configdefaults import config
+from aesara.graph.basic import Apply, Constant
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import Op
+from aesara.graph.opt import (
     LocalOptGroup,
     TopoOptimizer,
     check_stack_trace,
     local_optimizer,
     out2in,
 )
-from theano.graph.optdb import Query
-from theano.misc.safe_asarray import _asarray
-from theano.tensor import inplace
-from theano.tensor.basic import (
+from aesara.graph.optdb import Query
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor import inplace
+from aesara.tensor.basic import (
     Alloc,
     Join,
     MakeVector,
@@ -45,7 +44,7 @@
     make_vector,
     tile,
 )
-from theano.tensor.basic_opt import (
+from aesara.tensor.basic_opt import (
     ShapeFeature,
     assert_op,
     local_canonicalize_alloc,
@@ -58,13 +57,13 @@
     local_useless_reshape,
     register_specialize,
 )
-from theano.tensor.blas import Dot22, Gemv
-from theano.tensor.blas_c import CGemv
-from theano.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from theano.tensor.math import Dot, MaxAndArgmax, Prod, Sum, abs_, add
-from theano.tensor.math import all as tt_all
-from theano.tensor.math import any as tt_any
-from theano.tensor.math import (
+from aesara.tensor.blas import Dot22, Gemv
+from aesara.tensor.blas_c import CGemv
+from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise
+from aesara.tensor.math import Dot, MaxAndArgmax, Prod, Sum, abs_, add
+from aesara.tensor.math import all as tt_all
+from aesara.tensor.math import any as tt_any
+from aesara.tensor.math import (
     arccosh,
     arcsinh,
     arctanh,
@@ -95,25 +94,25 @@
     log10,
     lt,
 )
-from theano.tensor.math import max as tt_max
-from theano.tensor.math import maximum
-from theano.tensor.math import min as tt_min
-from theano.tensor.math import minimum, mul, neg, neq
-from theano.tensor.math import pow as tt_pow
-from theano.tensor.math import prod, rad2deg
-from theano.tensor.math import round as tt_round
-from theano.tensor.math import sgn, sin, sinh, sqr, sqrt, sub
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tan, tanh, true_div, xor
-from theano.tensor.math_opt import (
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.math import maximum
+from aesara.tensor.math import min as tt_min
+from aesara.tensor.math import minimum, mul, neg, neq
+from aesara.tensor.math import pow as tt_pow
+from aesara.tensor.math import prod, rad2deg
+from aesara.tensor.math import round as tt_round
+from aesara.tensor.math import sgn, sin, sinh, sqr, sqrt, sub
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tan, tanh, true_div, xor
+from aesara.tensor.math_opt import (
     local_add_specialize,
     local_greedy_distributor,
     local_lift_transpose_through_dot,
     mul_canonizer,
 )
-from theano.tensor.nnet.sigm import softplus
-from theano.tensor.shape import Reshape, Shape_i, SpecifyShape, reshape, specify_shape
-from theano.tensor.subtensor import (
+from aesara.tensor.nnet.sigm import softplus
+from aesara.tensor.shape import Reshape, Shape_i, SpecifyShape, reshape, specify_shape
+from aesara.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
     AdvancedSubtensor,
@@ -125,7 +124,7 @@
     inc_subtensor,
     set_subtensor,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     TensorType,
     bmatrix,
     bscalar,
@@ -161,7 +160,8 @@
     vector,
     vectors,
 )
-from theano.tensor.var import TensorConstant
+from aesara.tensor.var import TensorConstant
+from tests import unittest_tools as utt
 
 
 mode_opt = config.mode
@@ -1171,12 +1171,12 @@ def test_canonicalize_nan(self):
         sio = StringIO()
         handler = logging.StreamHandler(sio)
         handler.setLevel(logging.ERROR)
-        logging.getLogger("theano.graph.opt").addHandler(handler)
+        logging.getLogger("aesara.graph.opt").addHandler(handler)
         try:
             x = vector()
             function([x], x + np.nan)
         finally:
-            logging.getLogger("theano.graph.opt").removeHandler(handler)
+            logging.getLogger("aesara.graph.opt").removeHandler(handler)
         # Ideally this test would only catch the maxed out equilibrium
         # optimizer error message, but to be safe in case this message
         # is modified in the future, we assert that there is no error
@@ -1207,7 +1207,7 @@ def test_local_merge_abs():
 
 def test_merge_abs_bugfix():
     # Test crash in optimization reported by Jeremiah Lowin at
-    # https://groups.google.com/d/topic/theano-users/TaXfqXP2Mj0/discussion
+    # https://groups.google.com/d/topic/aesara-users/TaXfqXP2Mj0/discussion
     input = matrix()
     # normalize on cols
     step1 = input / input.sum(0)
@@ -1215,7 +1215,7 @@ def test_merge_abs_bugfix():
     step2 = step1 / step1.sum(1)
     # get l1 norm
     l1_norm = abs_(step2).sum()
-    function([input], theano.gradient.grad(l1_norm, input))
+    function([input], aesara.gradient.grad(l1_norm, input))
 
 
 def test_mixeddiv():
@@ -1267,7 +1267,7 @@ def test_cast_in_mul_canonizer():
     o2 = e * go
     mode = get_default_mode().excluding("fusion").including("fast_run")
     f = function([x, y], [o1, o2], mode=mode)
-    theano.printing.debugprint(f, print_type=True)
+    aesara.printing.debugprint(f, print_type=True)
     nodes = f.maker.fgraph.apply_nodes
     assert (
         len(
@@ -2048,7 +2048,7 @@ def test_big_fusion(self):
 
         # Make sure that C compilation is used
         mode = Mode("cvm", self.opts)
-        dlogp = function(vars, [theano.grad(logp, v) for v in vars], mode=mode)
+        dlogp = function(vars, [aesara.grad(logp, v) for v in vars], mode=mode)
 
         # Make sure something was fused
         assert any(
@@ -2449,7 +2449,7 @@ def test_local_useless_inc_subtensor():
     o_shape = set_subtensor(s, specify_shape(y, s.shape))
     f_shape = function([x, y], o_shape)
     topo = f_shape.maker.fgraph.toposort()
-    # theano.printing.debugprint(f_shape)
+    # aesara.printing.debugprint(f_shape)
     assert any(isinstance(n.op, IncSubtensor) for n in topo)
     out = f_shape([[2, 3, 6, 7]], [[8, 9]])
     assert (out == np.asarray([[8, 3, 9, 7]])).all()
@@ -2464,7 +2464,7 @@ def test_local_useless_subtensor():
         (slice(0, None), slice(0, None)),
     ]:
         f = function([x], exp(x).__getitem__(dims), mode=mode_opt)
-        # theano.printing.debugprint(f)
+        # aesara.printing.debugprint(f)
         prog = f.maker.fgraph.toposort()
         assert prog[0].op == exp
         assert len(prog) == 1
@@ -2483,7 +2483,7 @@ def test_local_useless_subtensor():
         ((slice(0, 1), 1), False),
     ]:
         f = function([x], exp(x_c).__getitem__(dims), mode=mode_opt)
-        # theano.printing.debugprint(f)
+        # aesara.printing.debugprint(f)
         prog = f.maker.fgraph.toposort()
         if res:
             assert isinstance(prog[0].op, SpecifyShape), dims
@@ -2538,7 +2538,7 @@ def test_local_useless_subtensor():
         ]
     ):
         f = function([x], exp(x).__getitem__(dims), mode=mode_opt)
-        # theano.printing.debugprint(f)
+        # aesara.printing.debugprint(f)
         prog = f.maker.fgraph.toposort()
         if res:
             assert prog[0].op == exp, dims
@@ -2555,7 +2555,7 @@ def test_local_useless_subtensor():
         ]
     ):
         f = function([x], exp(x_c).__getitem__(dims), mode=mode_opt)
-        # theano.printing.debugprint(f)
+        # aesara.printing.debugprint(f)
         prog = f.maker.fgraph.toposort()
         if res:
             assert prog[0].op == exp, dims
@@ -2572,7 +2572,7 @@ def test_local_useless_subtensor():
         ]
     ):
         f = function([x, s], exp(x).__getitem__(dims), mode=mode_opt)
-        # theano.printing.debugprint(f)
+        # aesara.printing.debugprint(f)
         prog = f.maker.fgraph.toposort()
         if res:
             assert prog[0].op == exp, dims
@@ -2596,7 +2596,7 @@ def test_local_useless_subtensor():
         (tt.arange(1, 2), False),
     ):
         f = function([x], exp(x_c).__getitem__(dims), mode=mode_opt)
-        # theano.printing.debugprint(f)
+        # aesara.printing.debugprint(f)
         prog = f.maker.fgraph.toposort()
         if res:
             assert isinstance(prog[0].op, SpecifyShape), dims
@@ -3152,7 +3152,7 @@ def test_scalar(self):
         g = function(
             [x, y], x[y::][-1], mode=mode_opt.excluding("local_subtensor_merge")
         )
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
 
         # Check stacktrace was copied over correctly after opt was applied
         assert check_stack_trace(f, ops_to_check=Subtensor)
@@ -3189,7 +3189,7 @@ def test_const2(self):
             # Check stacktrace was copied over correctly after opt was applied
             assert check_stack_trace(f, ops_to_check=Subtensor)
 
-            # theano.printing.debugprint(f, print_type=True)
+            # aesara.printing.debugprint(f, print_type=True)
             topo = f.maker.fgraph.toposort()
             # print [t for t in topo if isinstance(t.op, Subtensor)]
             assert len([t for t in topo if isinstance(t.op, Subtensor)]) == 1
@@ -3217,7 +3217,7 @@ def test_scalar2(self):
         g = function(
             [x, y], x[::-1][y], mode=mode_opt.excluding("local_subtensor_merge")
         )
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
 
         # Check stacktrace was copied over correctly after opt was applied
         assert check_stack_trace(f, ops_to_check=Subtensor)
@@ -3248,7 +3248,7 @@ def test_const3(self):
             # Check stacktrace was copied over correctly after opt was applied
             assert check_stack_trace(f, ops_to_check=Subtensor)
 
-            # theano.printing.debugprint(f, print_type=True)
+            # aesara.printing.debugprint(f, print_type=True)
             topo = f.maker.fgraph.toposort()
             # print [t for t in topo if isinstance(t.op, Subtensor)]
             assert len([t for t in topo if isinstance(t.op, Subtensor)]) == 1
@@ -3268,7 +3268,7 @@ def test_scalar3(self):
         # Check stacktrace was copied over correctly after opt was applied
         assert check_stack_trace(f, ops_to_check=Subtensor)
 
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
 
         topo = f.maker.fgraph.toposort()
         # print [t for t in topo if isinstance(t.op, Subtensor)]
@@ -3291,7 +3291,7 @@ def test_const4(self):
                 # Check stacktrace was copied over correctly after opt was applied
                 assert check_stack_trace(f, ops_to_check=Subtensor)
 
-                # theano.printing.debugprint(f, print_type=True)
+                # aesara.printing.debugprint(f, print_type=True)
                 topo = f.maker.fgraph.toposort()
                 # print [t for t in topo if isinstance(t.op, Subtensor)]
                 assert len([t for t in topo if isinstance(t.op, Subtensor)]) == 1
@@ -3312,7 +3312,7 @@ def test_scalar4(self):
         # Check stacktrace was copied over correctly after opt was applied
         assert check_stack_trace(f, ops_to_check=Subtensor)
 
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
 
         topo = f.maker.fgraph.toposort()
         # print [t for t in topo if isinstance(t.op, Subtensor)]
@@ -3361,7 +3361,7 @@ def test_scalar5(self):
         # Check stacktrace was copied over correctly after opt was applied
         assert check_stack_trace(f, ops_to_check=Subtensor)
 
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
 
         topo = f.maker.fgraph.toposort()
         # print [t for t in topo if isinstance(t.op, Subtensor)]
@@ -3455,7 +3455,7 @@ def test_scalar6(self):
         # Check stacktrace was copied over correctly after opt was applied
         assert check_stack_trace(f, ops_to_check=Subtensor)
 
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
 
         topo = f.maker.fgraph.toposort()
         # print [t for t in topo if isinstance(t.op, Subtensor)]
@@ -3508,7 +3508,7 @@ def test_none_slice(self):
         s2 = iscalar("s2")
 
         # Generate all possible lists of positions for None in those 6 slots
-        # A 1 indicates None is present, 0 that there is a Theano scalar.
+        # A 1 indicates None is present, 0 that there is an Aesara scalar.
         none_positions = np.ndindex(2, 2, 2, 2, 2, 2)
 
         # Ranges to be used when not None
@@ -3570,7 +3570,7 @@ def test_none_index(self):
         i = iscalar("i")
 
         # Generate all possible lists of positions for None in those 6 slots
-        # A 1 indicates None is present, 0 that there is a Theano scalar.
+        # A 1 indicates None is present, 0 that there is an Aesara scalar.
         # The last index (i) is never None
         none_positions = np.ndindex(2, 2, 2, 1)
 
@@ -3970,7 +3970,7 @@ def test_local_IncSubtensor_serialize():
     t = scalar("t")
     y = (W[i] + W[j] + W[1] + W[i, j]).sum()
     cost = sqr(t - y)
-    dW = theano.grad(cost, W)
+    dW = aesara.grad(cost, W)
     mode = get_default_mode().excluding("fusion")
     mode = mode.including("local_IncSubtensor_serialize")
     f = function([i, j, t], updates=[(W, W - 0.01 * dW)], mode=mode)
@@ -4056,14 +4056,14 @@ def test_equality(a, b):
         return a.shape == b.shape and np.allclose(a, b)
 
     # [cst]
-    f = function([m1, m2], theano.tensor.dot(m1, m2)[1], mode=mode)
+    f = function([m1, m2], aesara.tensor.dot(m1, m2)[1], mode=mode)
     topo = f.maker.fgraph.toposort()
     assert test_equality(f(d1, d2), np.dot(d1, d2)[1])
     # DimShuffle happen in FAST_COMPILE
     assert isinstance(topo[-1].op, (CGemv, Gemv, DimShuffle))
 
     # slice
-    f = function([m1, m2], theano.tensor.dot(m1, m2)[1:2], mode=mode)
+    f = function([m1, m2], aesara.tensor.dot(m1, m2)[1:2], mode=mode)
     topo = f.maker.fgraph.toposort()
     assert test_equality(f(d1, d2), np.dot(d1, d2)[1:2])
     assert isinstance(topo[-1].op, Dot22)
@@ -4074,12 +4074,12 @@ def test_equality(a, b):
     d1 = np.arange(30).reshape(2, 5, 3).astype(config.floatX)
     d2 = np.arange(72).reshape(4, 3, 6).astype(config.floatX) + 100
 
-    f = function([m1, m2, idx], theano.tensor.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
+    f = function([m1, m2, idx], aesara.tensor.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
     assert test_equality(f(d1, d2, 1), np.dot(d1, d2)[1, 1:4, :, 1:])
     # if we return the gradients. We need to use same mode as before.
     assert check_stack_trace(f, ops_to_check="last")
 
-    f = function([m1, m2, idx], theano.tensor.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
+    f = function([m1, m2, idx], aesara.tensor.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
     assert test_equality(f(d1, d2, 1), np.dot(d1, d2)[1:4, :, 1:, 1])
 
     # Now test that the stack trace is copied over properly,
@@ -4469,11 +4469,11 @@ def test_local_useless_elemwise_comparison(self):
         # The following case is what made me discover those cases.
         X = matrix("X")
         Y = vector("Y")
-        X_sum, updates = theano.scan(
+        X_sum, updates = aesara.scan(
             fn=lambda x: x.sum(), outputs_info=None, sequences=[X], non_sequences=None
         )
         Z = X_sum + Y
-        # theano.printing.debugprint(Z)
+        # aesara.printing.debugprint(Z)
         # here is the output for the debug print:
         """
         Elemwise{add,no_inplace} [id A] ''
@@ -4504,7 +4504,7 @@ def test_local_useless_elemwise_comparison(self):
             self.rng.rand(2, 3).astype(config.floatX),
             self.rng.rand(2).astype(config.floatX),
         )
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
         # here is the output for the debug print:
         """
         Elemwise{Add}[(0, 0)] [id A] <TensorType(float64, vector)> ''   7
@@ -5708,12 +5708,12 @@ def test_constant_folding():
 
 
 @pytest.mark.xfail(
-    reason="Theano optimizes constant before stabilization. "
+    reason="Aesara optimizes constant before stabilization. "
     "This breaks stabilization optimization in some "
     "cases. See #504."
 )
 def test_constant_get_stabilized():
-    # Currently Theano enable the constant_folding optimization before stabilization optimization.
+    # Currently Aesara enable the constant_folding optimization before stabilization optimization.
     # This cause some stabilization optimization not being implemented and thus cause inf value to appear
     # when it should not.
     #
@@ -5828,7 +5828,7 @@ def test_local_mul_switch_sink(self):
         # This case caused a missed optimization in the past.
         x = dscalar("x")
         y = tt.switch(x < 7, x, sqrt(x - 7))
-        f = self.function_remove_nan([x], theano.gradient.grad(y, x), self.mode)
+        f = self.function_remove_nan([x], aesara.gradient.grad(y, x), self.mode)
         assert f(5) == 1, f(5)
 
     @pytest.mark.slow
@@ -6134,7 +6134,7 @@ def test_local_grad_log_erfc_neg(self):
         mode_fusion = copy.copy(self.mode_fusion)
         mode_fusion.check_isfinite = False
 
-        f = function([x], theano.gradient.grad(log(erfc(x)).sum(), x), mode=mode)
+        f = function([x], aesara.gradient.grad(log(erfc(x)).sum(), x), mode=mode)
 
         assert len(f.maker.fgraph.apply_nodes) == 22, len(f.maker.fgraph.apply_nodes)
         assert all(np.isfinite(f(val)))
@@ -6165,12 +6165,12 @@ def test_local_grad_log_erfc_neg(self):
         assert all(np.isfinite(f(val)))
 
         # test that it work correctly if x is x*2 in the graph.
-        f = function([x], theano.gradient.grad(log(erfc(2 * x)).sum(), x), mode=mode)
+        f = function([x], aesara.gradient.grad(log(erfc(2 * x)).sum(), x), mode=mode)
         assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
         assert np.isfinite(f(val)).all()
         assert f.maker.fgraph.outputs[0].dtype == config.floatX
 
-        f = function([x], theano.gradient.grad(log(erfc(x)).sum(), x), mode=mode_fusion)
+        f = function([x], aesara.gradient.grad(log(erfc(x)).sum(), x), mode=mode_fusion)
         assert len(f.maker.fgraph.apply_nodes) == 1, len(f.maker.fgraph.apply_nodes)
         assert f.maker.fgraph.outputs[0].dtype == config.floatX
 
@@ -6951,7 +6951,7 @@ def test_local_reduce_join(self):
         topo = f.maker.fgraph.toposort()
         assert isinstance(topo[-1].op, Elemwise)
 
-        # Test a case that was bugged in a old Theano bug
+        # Test a case that was bugged in a old Aesara bug
         with config.change_flags(warn__reduce_join=False):
             f = function([], tt_sum(tt.stack([A, A]), axis=1), mode=self.mode)
 
@@ -7175,9 +7175,9 @@ def test_make_vector(self):
             f(val[b], val[i], val[d])
 
             s = mv.sum()
-            gb = theano.gradient.grad(s, b, disconnected_inputs="ignore")
-            gi = theano.gradient.grad(s, i, disconnected_inputs="ignore")
-            gd = theano.gradient.grad(s, d, disconnected_inputs="ignore")
+            gb = aesara.gradient.grad(s, b, disconnected_inputs="ignore")
+            gi = aesara.gradient.grad(s, i, disconnected_inputs="ignore")
+            gd = aesara.gradient.grad(s, d, disconnected_inputs="ignore")
             # print 'gb =', gb
             # print 'gi =', gi
             # print 'gd =', gd
@@ -7232,7 +7232,7 @@ def fun(*fl_inputs):
         ]:
             try:
                 MakeVector(dtype=dtype)(*inputs)
-                raise Exception("Theano should have raised an error")
+                raise Exception("Aesara should have raised an error")
             except AssertionError:
                 pass
 
@@ -7387,7 +7387,7 @@ def test_local_join_make_vector():
     mv = MakeVector(config.floatX)
     s = tt.join(0, mv(a), v, mv(b, c), mv(d, e))
     f = function([a, b, c, d, e, v], s, mode=mode_opt)
-    theano.printing.debugprint(f)
+    aesara.printing.debugprint(f)
     val = f(1, 2, 3, 4, 6, [7, 8])
     assert np.all(val == [1, 7, 8, 2, 3, 4, 6])
     e = f.maker.fgraph.toposort()
@@ -7724,7 +7724,7 @@ def test_matrix_col(self):
 def test_local_upcast_elemwise_constant_inputs():
     s = dvector("s")
     x = tt_sum(log(10 ** s))
-    f = function([s], [theano.gradient.grad(x, s)])
+    f = function([s], [aesara.gradient.grad(x, s)])
     f([-42, -2.1, -1, -0.5, 0, 0.2, 1, 2, 12])
 
     # This test a corner where the optimization should not be applied.
@@ -7818,7 +7818,7 @@ def test_assert_op_gradient():
     x = vector("x")
     assert_op = Assert()
     cost = tt_sum(assert_op(x, x.size < 2))
-    grad = theano.gradient.grad(cost, x)
+    grad = aesara.gradient.grad(cost, x)
     func = function([x], grad)
 
     x_val = np.ones(shape=(1,), dtype=config.floatX)
diff --git a/tests/tensor/test_basic_scipy.py b/tests/tensor/test_basic_scipy.py
index 36931e5694..ab4816c221 100644
--- a/tests/tensor/test_basic_scipy.py
+++ b/tests/tensor/test_basic_scipy.py
@@ -6,6 +6,10 @@
 
 from functools import partial
 
+from aesara import tensor as tt
+from aesara.compile.mode import get_default_mode
+from aesara.configdefaults import config
+from aesara.tensor import inplace
 from tests import unittest_tools as utt
 from tests.tensor.utils import (
     _good_broadcast_unary_chi2sf,
@@ -21,10 +25,6 @@
     rand_ranged,
     randint_ranged,
 )
-from theano import tensor as tt
-from theano.compile.mode import get_default_mode
-from theano.configdefaults import config
-from theano.tensor import inplace
 
 
 imported_scipy_special = False
diff --git a/tests/tensor/test_blas.py b/tests/tensor/test_blas.py
index 0b039e6eaf..3211abdc8f 100644
--- a/tests/tensor/test_blas.py
+++ b/tests/tensor/test_blas.py
@@ -18,25 +18,22 @@
 )
 from numpy.testing import assert_array_almost_equal
 
-import theano
-import theano.scalar as ts
-import theano.tensor as tt
-import theano.tensor.blas_scipy
-from tests import unittest_tools
-from tests import unittest_tools as utt
-from tests.tensor.utils import inplace_func, makeTester, rand
-from theano.compile.function import function
-from theano.compile.io import In
-from theano.compile.mode import Mode
-from theano.compile.sharedvalue import shared
-from theano.configdefaults import config
-from theano.gradient import grad
-from theano.graph.fg import FunctionGraph
-from theano.graph.opt import in2out
-from theano.misc.safe_asarray import _asarray
-from theano.tensor import inplace
-from theano.tensor.basic import as_tensor_variable
-from theano.tensor.blas import (
+import aesara
+import aesara.scalar as ts
+import aesara.tensor as tt
+import aesara.tensor.blas_scipy
+from aesara.compile.function import function
+from aesara.compile.io import In
+from aesara.compile.mode import Mode
+from aesara.compile.sharedvalue import shared
+from aesara.configdefaults import config
+from aesara.gradient import grad
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.opt import in2out
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor import inplace
+from aesara.tensor.basic import as_tensor_variable
+from aesara.tensor.blas import (
     Dot22,
     Dot22Scalar,
     Gemm,
@@ -63,10 +60,10 @@
     local_gemm_to_ger,
     res_is_a,
 )
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.math import Dot, dot, mean, mul, neg, outer, sqrt
-from theano.tensor.nnet import sigmoid
-from theano.tensor.type import (
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.math import Dot, dot, mean, mul, neg, outer, sqrt
+from aesara.tensor.nnet import sigmoid
+from aesara.tensor.type import (
     cmatrix,
     col,
     cscalar,
@@ -92,6 +89,9 @@
     zmatrix,
     zscalar,
 )
+from tests import unittest_tools
+from tests import unittest_tools as utt
+from tests.tensor.utils import inplace_func, makeTester, rand
 
 
 if config.mode == "FAST_COMPILE":
@@ -99,7 +99,7 @@
 else:
     mode_not_fast_compile = config.mode
 
-mode_blas_opt = theano.compile.get_default_mode().including(
+mode_blas_opt = aesara.compile.get_default_mode().including(
     "BlasOpt", "specialize", "InplaceBlasOpt"
 )
 mode_blas_opt = mode_blas_opt.excluding("c_blas")
@@ -168,7 +168,7 @@ def cmp_linker(z, a, x, y, b, l):
             cmp_linker(copy(z), a, x, y, b, "py")
 
             if not dtype.startswith("complex") and config.cxx:
-                # If config.blas__ldflags is empty, Theano will use
+                # If config.blas__ldflags is empty, Aesara will use
                 # a NumPy C implementation of [sd]gemm_.
                 cmp_linker(copy(z), a, x, y, b, "c")
 
@@ -849,7 +849,7 @@ def test_upcasting_scalar_nogemm():
     f = function([w, v, t, alpha], rval)
     t = f.maker.fgraph.toposort()
     assert np.sum([isinstance(n.op, Gemm) for n in t]) == 0
-    # theano.printing.debugprint(f, print_type=True)
+    # aesara.printing.debugprint(f, print_type=True)
 
     v = fmatrix("v")
     w = fmatrix("w")
@@ -862,7 +862,7 @@ def test_upcasting_scalar_nogemm():
 
     t = f.maker.fgraph.toposort()
     assert np.sum([isinstance(n.op, Gemm) for n in t]) == 0
-    # theano.printing.debugprint(f, print_type=True)
+    # aesara.printing.debugprint(f, print_type=True)
 
 
 def test_gemm_nested():
@@ -984,13 +984,13 @@ def update_H(cur_V):
             cur_V = update_V(cur_H)
             cur_H = update_H(cur_V)
 
-        unrolled_theano = function(
-            [], updates=[(V, cur_V), (H, cur_H)], name="unrolled_theano"
+        unrolled_aesara = function(
+            [], updates=[(V, cur_V), (H, cur_H)], name="unrolled_aesara"
         )
         nb_dot = sum(
             [
                 1
-                for node in unrolled_theano.maker.fgraph.toposort()
+                for node in unrolled_aesara.maker.fgraph.toposort()
                 if isinstance(
                     node.op,
                     (
@@ -1005,7 +1005,7 @@ def update_H(cur_V):
         # So the final graph should have 1 + 2* num_rounds dot variant op.
         assert nb_dot == num_rounds * 2 + 1, nb_dot
 
-        unrolled_theano()
+        unrolled_aesara()
 
 
 def test_inplace0():
@@ -1037,7 +1037,7 @@ def test_inplace1():
     X, Y, Z, a, b = XYZab()
     # with > 2 terms in the overall addition
     f = inplace_func([X, Y, Z], [Z + Z + dot(X, Y)], mode="FAST_RUN")
-    # theano.printing.debugprint(f)
+    # aesara.printing.debugprint(f)
     # it doesn't work inplace because we didn't mark Z as mutable input
     assert [n.op for n in f.maker.fgraph.apply_nodes] == [gemm_no_inplace]
 
@@ -1074,9 +1074,9 @@ def test_dot22scalar():
     # including does not seem to work for 'local_dot_to_dot22' and
     # 'local_dot22_to_dot22scalar'
     # TODO: exclude other optimizations in BlasOpt?
-    # m = theano.compile.get_default_mode().including('local_dot_to_dot22',
+    # m = aesara.compile.get_default_mode().including('local_dot_to_dot22',
     #                           'local_dot22_to_dot22scalar','specialize')
-    # m = theano.compile.get_default_mode().including('BlasOpt', 'specialize')
+    # m = aesara.compile.get_default_mode().including('BlasOpt', 'specialize')
     rng = np.random.RandomState(unittest_tools.fetch_seed())
     for dtype1 in ["complex64", "complex128"]:
         a = matrix("a", dtype=dtype1)
@@ -1091,7 +1091,7 @@ def test_dot22scalar():
                     def check_dot22scalar(func, len_topo_scalar=-1):
                         topo = func.maker.fgraph.toposort()
                         ops = [x.op for x in topo]
-                        dtype4_upcast = theano.scalar.upcast(dtype4, dtype1, dtype2)
+                        dtype4_upcast = aesara.scalar.upcast(dtype4, dtype1, dtype2)
 
                         if dtype1 == dtype2 == dtype3 == dtype4_upcast:
                             if len_topo_scalar > 0:
@@ -1218,7 +1218,7 @@ def test_dot22scalar_cast():
 def test_local_dot22_to_dot22scalar():
     # This test that the bug in gh-1507 is really fixed
     A = dmatrix()
-    mode = theano.compile.mode.get_default_mode()
+    mode = aesara.compile.mode.get_default_mode()
     opt = in2out(local_dot22_to_dot22scalar)
     mode = mode.__class__(optimizer=opt)
 
@@ -1290,7 +1290,7 @@ def test_dot_vv(self):
         rng = np.random.RandomState(unittest_tools.fetch_seed())
         v = shared(np.array(rng.uniform(size=(2,)), dtype="float32"))
         w = shared(np.array(rng.uniform(size=(2,)), dtype="float32"))
-        f = function([], theano.tensor.dot(v, w), mode=mode_blas_opt)
+        f = function([], aesara.tensor.dot(v, w), mode=mode_blas_opt)
 
         # Assert that the dot was optimized somehow
         self.assertFunctionContains0(f, dot)
@@ -1304,7 +1304,7 @@ def test_dot_vm(self):
         rng = np.random.RandomState(unittest_tools.fetch_seed())
         v = shared(np.array(rng.uniform(size=(2,)), dtype="float32"))
         m = shared(np.array(rng.uniform(size=(2, 3)), dtype="float32"))
-        f = function([], theano.tensor.dot(v, m), mode=mode_blas_opt)
+        f = function([], aesara.tensor.dot(v, m), mode=mode_blas_opt)
 
         # Assert that the dot was optimized somehow
         self.assertFunctionContains0(f, dot)
@@ -1321,7 +1321,7 @@ def test_dot_mv(self):
         rng = np.random.RandomState(unittest_tools.fetch_seed())
         v = shared(np.array(rng.uniform(size=(2,)), dtype="float32"))
         m = shared(np.array(rng.uniform(size=(3, 2)), dtype="float32"))
-        f = function([], theano.tensor.dot(m, v), mode=mode_blas_opt)
+        f = function([], aesara.tensor.dot(m, v), mode=mode_blas_opt)
 
         # Assert that the dot was optimized somehow
         self.assertFunctionContains0(f, dot)
@@ -1342,7 +1342,7 @@ def t_gemv1(m_shp):
         v2 = shared(v2_orig)
         m = shared(np.array(rng.uniform(size=m_shp), dtype="float32"))
 
-        f = function([], v2 + theano.tensor.dot(m, v1), mode=mode_blas_opt)
+        f = function([], v2 + aesara.tensor.dot(m, v1), mode=mode_blas_opt)
 
         # Assert they produce the same output
         assert np.allclose(f(), np.dot(m.get_value(), v1.get_value()) + v2_orig)
@@ -1353,7 +1353,7 @@ def t_gemv1(m_shp):
 
         # test the inplace version
         g = function(
-            [], [], updates=[(v2, v2 + theano.tensor.dot(m, v1))], mode=mode_blas_opt
+            [], [], updates=[(v2, v2 + aesara.tensor.dot(m, v1))], mode=mode_blas_opt
         )
 
         # Assert they produce the same output
@@ -1391,7 +1391,7 @@ def test_gemv2(self):
         v2 = shared(v2_orig)
         m = shared(np.array(rng.uniform(size=(2, 3)), dtype="float32"))
 
-        f = function([], v2 + theano.tensor.dot(v1, m), mode=mode_blas_opt)
+        f = function([], v2 + aesara.tensor.dot(v1, m), mode=mode_blas_opt)
 
         # Assert they produce the same output
         assert np.allclose(f(), np.dot(v1.get_value(), m.get_value()) + v2.get_value())
@@ -1401,7 +1401,7 @@ def test_gemv2(self):
 
         # test the inplace version
         g = function(
-            [], [], updates=[(v2, v2 + theano.tensor.dot(v1, m))], mode=mode_blas_opt
+            [], [], updates=[(v2, v2 + aesara.tensor.dot(v1, m))], mode=mode_blas_opt
         )
 
         # Assert they produce the same output
@@ -1433,7 +1433,7 @@ def test_gemv_broadcast(self):
             np.array(rng.uniform(size=(1, 2)), dtype="float32"),
             broadcastable=(True, False),
         )
-        o = theano.tensor.dot(m, v1)
+        o = aesara.tensor.dot(m, v1)
         f = function([], o + v2, mode=mode_blas_opt)
 
         # Assert they produce the same output
@@ -1480,7 +1480,7 @@ def test_gemv_dimensions(self):
 # The following gemv tests were added in March 2011 by Ian Goodfellow
 # and are based on the gemv tests from scipy
 # http://projects.scipy.org/scipy/browser/trunk/scipy/linalg/tests/test_fblas.py?rev=6803
-# NOTE: At the time these tests were written, theano did not have a
+# NOTE: At the time these tests were written, aesara did not have a
 # conjugate function. If such a thing is ever added, the tests involving
 # conjugate should be ported over as well.
 
@@ -1699,7 +1699,7 @@ def test_upcasting_scalar_nogemv(self):
         # done inplace on a temporarily allocated-buffer, which is
         # then scaled by alpha and to t with a fused elemwise.
         n_gemvs = 0
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
         for node in f.maker.fgraph.toposort():
             if node.op == self.gemv_inplace:
                 n_gemvs += 1
@@ -1820,7 +1820,7 @@ class TestGer(unittest_tools.OptimizationTestMixin):
     shared = staticmethod(shared)
 
     def setup_method(self):
-        self.mode = theano.compile.get_default_mode().including("fast_run")
+        self.mode = aesara.compile.get_default_mode().including("fast_run")
         self.mode = self.mode.excluding("c_blas", "scipy_blas")
         dtype = self.dtype = "float64"  # optimization isn't dtype-dependent
         self.A = tensor(dtype=dtype, broadcastable=(False, False))
@@ -2020,7 +2020,7 @@ def test_inplace(self):
 
 class TestBlasStrides:
     dtype = "float64"
-    mode = theano.compile.get_default_mode()
+    mode = aesara.compile.get_default_mode()
     mode = mode.including("fast_run").excluding("gpu", "c_blas", "scipy_blas")
     rng = np.random.RandomState(seed=unittest_tools.fetch_seed())
 
@@ -2046,7 +2046,7 @@ def cmp_dot22(self, b_shp, c_shp):
 
         f_nn = function([], [], updates=[(a, dot(b, c))], mode=self.mode)
         # print 'class name:', self.__class__.__name__
-        # theano.printing.debugprint(f_nn)
+        # aesara.printing.debugprint(f_nn)
         f_nt = function([], [], updates=[(a, dot(b, c_t.T))], mode=self.mode)
         f_tn = function([], [], updates=[(a, dot(b_t.T, c))], mode=self.mode)
         f_tt = function([], [], updates=[(a, dot(b_t.T, c_t.T))], mode=self.mode)
diff --git a/tests/tensor/test_blas_c.py b/tests/tensor/test_blas_c.py
index 6fe18a22dd..e692fd42d5 100644
--- a/tests/tensor/test_blas_c.py
+++ b/tests/tensor/test_blas_c.py
@@ -3,31 +3,31 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.basic import AllocEmpty
+from aesara.tensor.blas import Ger
+from aesara.tensor.blas_c import CGemv, CGer, check_force_gemv_init
+from aesara.tensor.blas_scipy import ScipyGer
+from aesara.tensor.type import dmatrix, dvector, matrix, scalar, tensor, vector
 from tests import unittest_tools
 from tests.tensor.test_blas import BaseGemv, TestBlasStrides
 from tests.unittest_tools import OptimizationTestMixin
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.basic import AllocEmpty
-from theano.tensor.blas import Ger
-from theano.tensor.blas_c import CGemv, CGer, check_force_gemv_init
-from theano.tensor.blas_scipy import ScipyGer
-from theano.tensor.type import dmatrix, dvector, matrix, scalar, tensor, vector
 
 
-mode_blas_opt = theano.compile.get_default_mode().including(
+mode_blas_opt = aesara.compile.get_default_mode().including(
     "BlasOpt", "specialize", "InplaceBlasOpt", "c_blas"
 )
 
 
 def skip_if_blas_ldflags_empty(*functions_detected):
-    if theano.config.blas__ldflags == "":
+    if aesara.config.blas__ldflags == "":
         functions_string = ""
         if functions_detected:
             functions_string = " (at least " + (", ".join(functions_detected)) + ")"
         pytest.skip(
-            "This test is useful only when Theano can access to BLAS functions"
+            "This test is useful only when Aesara can access to BLAS functions"
             + functions_string
             + " other than [sd]gemm_."
         )
@@ -38,9 +38,9 @@ def setup_method(self):
         self.manual_setup_method()
 
     def manual_setup_method(self, dtype="float64"):
-        # This tests can run even when theano.config.blas__ldflags is empty.
+        # This tests can run even when aesara.config.blas__ldflags is empty.
         self.dtype = dtype
-        self.mode = theano.compile.get_default_mode().including("fast_run")
+        self.mode = aesara.compile.get_default_mode().including("fast_run")
         self.A = tensor(dtype=dtype, broadcastable=(False, False))
         self.a = tensor(dtype=dtype, broadcastable=())
         self.x = tensor(dtype=dtype, broadcastable=(False,))
@@ -50,7 +50,7 @@ def manual_setup_method(self, dtype="float64"):
         self.yval = np.asarray([1.5, 2.7, 3.9], dtype=dtype)
 
     def function(self, inputs, outputs):
-        return theano.function(
+        return aesara.function(
             inputs,
             outputs,
             mode=self.mode,
@@ -126,10 +126,10 @@ class TestCGemv(OptimizationTestMixin):
     """
 
     def setup_method(self):
-        # This tests can run even when theano.config.blas__ldflags is empty.
+        # This tests can run even when aesara.config.blas__ldflags is empty.
         dtype = "float64"
         self.dtype = dtype
-        self.mode = theano.compile.get_default_mode().including("fast_run")
+        self.mode = aesara.compile.get_default_mode().including("fast_run")
         # matrix
         self.A = tensor(dtype=dtype, broadcastable=(False, False))
         self.Aval = np.ones((2, 3), dtype=dtype)
@@ -146,7 +146,7 @@ def setup_method(self):
     def test_nan_beta_0(self):
         mode = self.mode.including()
         mode.check_isfinite = False
-        f = theano.function(
+        f = aesara.function(
             [self.A, self.x, self.y, self.a],
             self.a * self.y + tt.dot(self.A, self.x),
             mode=mode,
@@ -160,7 +160,7 @@ def test_nan_beta_0(self):
     def test_optimizations_vm(self):
         skip_if_blas_ldflags_empty()
         """ Test vector dot matrix """
-        f = theano.function([self.x, self.A], tt.dot(self.x, self.A), mode=self.mode)
+        f = aesara.function([self.x, self.A], tt.dot(self.x, self.A), mode=self.mode)
 
         # Assert that the dot was optimized somehow
         self.assertFunctionContains0(f, tt.dot)
@@ -178,7 +178,7 @@ def test_optimizations_vm(self):
     def test_optimizations_mv(self):
         skip_if_blas_ldflags_empty()
         """ Test matrix dot vector """
-        f = theano.function([self.A, self.y], tt.dot(self.A, self.y), mode=self.mode)
+        f = aesara.function([self.A, self.y], tt.dot(self.A, self.y), mode=self.mode)
 
         # Assert that the dot was optimized somehow
         self.assertFunctionContains0(f, tt.dot)
@@ -195,7 +195,7 @@ def test_optimizations_mv(self):
     def test_force_gemv_init(self):
         if check_force_gemv_init():
             warn(
-                "WARNING: The current BLAS requires Theano to initialize"
+                "WARNING: The current BLAS requires Aesara to initialize"
                 + " memory for some GEMV calls which will result in a minor"
                 + " degradation in performance for such calls."
             )
@@ -203,12 +203,12 @@ def test_force_gemv_init(self):
     def t_gemv1(self, m_shp):
         """ test vector2 + dot(matrix, vector1) """
         rng = np.random.RandomState(unittest_tools.fetch_seed())
-        v1 = theano.shared(np.array(rng.uniform(size=(m_shp[1],)), dtype="float32"))
+        v1 = aesara.shared(np.array(rng.uniform(size=(m_shp[1],)), dtype="float32"))
         v2_orig = np.array(rng.uniform(size=(m_shp[0],)), dtype="float32")
-        v2 = theano.shared(v2_orig)
-        m = theano.shared(np.array(rng.uniform(size=m_shp), dtype="float32"))
+        v2 = aesara.shared(v2_orig)
+        m = aesara.shared(np.array(rng.uniform(size=m_shp), dtype="float32"))
 
-        f = theano.function([], v2 + tt.dot(m, v1), mode=self.mode)
+        f = aesara.function([], v2 + tt.dot(m, v1), mode=self.mode)
 
         # Assert they produce the same output
         assert np.allclose(f(), np.dot(m.get_value(), v1.get_value()) + v2_orig)
@@ -216,7 +216,7 @@ def t_gemv1(self, m_shp):
         assert topo == [CGemv(inplace=False)], topo
 
         # test the inplace version
-        g = theano.function([], [], updates=[(v2, v2 + tt.dot(m, v1))], mode=self.mode)
+        g = aesara.function([], [], updates=[(v2, v2 + tt.dot(m, v1))], mode=self.mode)
 
         # Assert they produce the same output
         g()
@@ -247,11 +247,11 @@ def test_gemv1(self):
         self.t_gemv1((0, 0))
 
     def test_gemv_dimensions(self, dtype="float32"):
-        alpha = theano.shared(_asarray(1.0, dtype=dtype), name="alpha")
-        beta = theano.shared(_asarray(1.0, dtype=dtype), name="beta")
+        alpha = aesara.shared(_asarray(1.0, dtype=dtype), name="alpha")
+        beta = aesara.shared(_asarray(1.0, dtype=dtype), name="beta")
 
         z = beta * self.y + alpha * tt.dot(self.A, self.x)
-        f = theano.function([self.A, self.x, self.y], z, mode=self.mode)
+        f = aesara.function([self.A, self.x, self.y], z, mode=self.mode)
 
         # Matrix value
         A_val = np.ones((5, 3), dtype=dtype)
@@ -275,7 +275,7 @@ def test_multiple_inplace(self):
         x = dmatrix("x")
         y = dvector("y")
         z = dvector("z")
-        f = theano.function([x, y, z], [tt.dot(y, x), tt.dot(z, x)], mode=mode_blas_opt)
+        f = aesara.function([x, y, z], [tt.dot(y, x), tt.dot(z, x)], mode=mode_blas_opt)
         vx = np.random.rand(3, 3)
         vy = np.random.rand(3)
         vz = np.random.rand(3)
@@ -336,7 +336,7 @@ def get_function(self, dtype, transpose_A=False, slice_tensors=False):
             A_2 = A_1
             x_2 = x
             y_2 = y
-        return theano.function(
+        return aesara.function(
             [alpha, A, x, beta, y],
             self.gemv(y_2, alpha, A_2, x_2, beta),
             mode=self.mode,
@@ -374,7 +374,7 @@ def compute_ref(self, alpha, A, x, beta, y, transpose_A, slice_tensors):
             ref_val += beta * y
         return ref_val
 
-    @theano.config.change_flags(blas__ldflags="")
+    @aesara.config.change_flags(blas__ldflags="")
     def run_cgemv(self, dtype, ALPHA, BETA, transpose_A, slice_tensors):
         f = self.get_function(
             dtype, transpose_A=transpose_A, slice_tensors=slice_tensors
diff --git a/tests/tensor/test_blas_scipy.py b/tests/tensor/test_blas_scipy.py
index 96e553f625..e3e8e83e64 100644
--- a/tests/tensor/test_blas_scipy.py
+++ b/tests/tensor/test_blas_scipy.py
@@ -1,19 +1,19 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara import tensor as tt
+from aesara.tensor.blas_scipy import ScipyGer
+from aesara.tensor.math import outer
+from aesara.tensor.type import tensor
 from tests.tensor.test_blas import TestBlasStrides, gemm_no_inplace
 from tests.unittest_tools import OptimizationTestMixin
-from theano import tensor as tt
-from theano.tensor.blas_scipy import ScipyGer
-from theano.tensor.math import outer
-from theano.tensor.type import tensor
 
 
-@pytest.mark.skipif(not theano.tensor.blas_scipy.have_fblas, reason="fblas needed")
+@pytest.mark.skipif(not aesara.tensor.blas_scipy.have_fblas, reason="fblas needed")
 class TestScipyGer(OptimizationTestMixin):
     def setup_method(self):
-        self.mode = theano.compile.get_default_mode()
+        self.mode = aesara.compile.get_default_mode()
         self.mode = self.mode.including("fast_run")
         self.mode = self.mode.excluding("c_blas")  # c_blas trumps scipy Ops
         dtype = self.dtype = "float64"  # optimization isn't dtype-dependent
@@ -26,7 +26,7 @@ def setup_method(self):
         self.yval = np.asarray([1.5, 2.7, 3.9], dtype=dtype)
 
     def function(self, inputs, outputs):
-        return theano.function(inputs, outputs, self.mode)
+        return aesara.function(inputs, outputs, self.mode)
 
     def run_f(self, f):
         f(self.Aval, self.xval, self.yval)
@@ -60,5 +60,5 @@ def test_scaled_A_plus_scaled_outer(self):
 
 
 class TestBlasStridesScipy(TestBlasStrides):
-    mode = theano.compile.get_default_mode()
+    mode = aesara.compile.get_default_mode()
     mode = mode.including("fast_run").excluding("gpu", "c_blas")
diff --git a/tests/tensor/test_casting.py b/tests/tensor/test_casting.py
index d7dc80a224..ffbedae13f 100644
--- a/tests/tensor/test_casting.py
+++ b/tests/tensor/test_casting.py
@@ -1,11 +1,11 @@
 import numpy as np
 
-import theano
-import theano.tensor.basic as basic
-from theano import function
-from theano.compile.io import In
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.basic import (
+import aesara
+import aesara.tensor.basic as basic
+from aesara import function
+from aesara.compile.io import In
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.basic import (
     _convert_to_float32,
     _convert_to_float64,
     _convert_to_int8,
@@ -14,7 +14,7 @@
     _convert_to_int64,
     cast,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     TensorType,
     bvector,
     dmatrix,
@@ -100,22 +100,22 @@ def test_convert_to_complex(self):
 
         # upcasting to complex128
         for t in ["int8", "int16", "int32", "int64", "float32", "float64"]:
-            a = theano.shared(np.ones(3, dtype=t))
-            b = theano.shared(np.ones(3, dtype="complex128"))
+            a = aesara.shared(np.ones(3, dtype=t))
+            b = aesara.shared(np.ones(3, dtype="complex128"))
             f = function([], basic._convert_to_complex128(a))
             assert a.type.values_eq_approx(b.get_value(), f())
 
         # upcasting to complex64
         for t in ["int8", "int16", "int32", "int64", "float32"]:
-            a = theano.shared(np.ones(3, dtype=t))
-            b = theano.shared(np.ones(3, dtype="complex64"))
+            a = aesara.shared(np.ones(3, dtype=t))
+            b = aesara.shared(np.ones(3, dtype="complex64"))
             f = function([], basic._convert_to_complex64(a))
             assert a.type.values_eq_approx(b.get_value(), f())
 
         # downcast to complex64
         for t in ["float64"]:
-            a = theano.shared(np.ones(3, dtype=t))
-            b = theano.shared(np.ones(3, dtype="complex64"))
+            a = aesara.shared(np.ones(3, dtype=t))
+            b = aesara.shared(np.ones(3, dtype="complex64"))
             f = function([], basic._convert_to_complex64(a))
             assert a.type.values_eq_approx(b.get_value(), f())
 
diff --git a/tests/tensor/test_complex.py b/tests/tensor/test_complex.py
index ba6ec7a99d..14dce03b3d 100644
--- a/tests/tensor/test_complex.py
+++ b/tests/tensor/test_complex.py
@@ -1,12 +1,12 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara.gradient import GradientError
+from aesara.tensor.basic import cast
+from aesara.tensor.math import complex, complex_from_polar, imag, real
+from aesara.tensor.type import cvector, dvector, fmatrix, fvector, imatrix, zvector
 from tests import unittest_tools as utt
-from theano.gradient import GradientError
-from theano.tensor.basic import cast
-from theano.tensor.math import complex, complex_from_polar, imag, real
-from theano.tensor.type import cvector, dvector, fmatrix, fvector, imatrix, zvector
 
 
 class TestRealImag:
@@ -14,20 +14,20 @@ def test_basic(self):
         x = zvector()
         rng = np.random.RandomState(23)
         xval = np.asarray(list(np.complex(rng.randn(), rng.randn()) for i in range(10)))
-        assert np.all(xval.real == theano.function([x], real(x))(xval))
-        assert np.all(xval.imag == theano.function([x], imag(x))(xval))
+        assert np.all(xval.real == aesara.function([x], real(x))(xval))
+        assert np.all(xval.imag == aesara.function([x], imag(x))(xval))
 
     def test_on_real_input(self):
         x = dvector()
         rng = np.random.RandomState(23)
         xval = rng.randn(10)
-        np.all(0 == theano.function([x], imag(x))(xval))
-        np.all(xval == theano.function([x], real(x))(xval))
+        np.all(0 == aesara.function([x], imag(x))(xval))
+        np.all(xval == aesara.function([x], real(x))(xval))
 
         x = imatrix()
         xval = np.asarray(rng.randn(3, 3) * 100, dtype="int32")
-        np.all(0 == theano.function([x], imag(x))(xval))
-        np.all(xval == theano.function([x], real(x))(xval))
+        np.all(0 == aesara.function([x], imag(x))(xval))
+        np.all(xval == aesara.function([x], real(x))(xval))
 
     def test_cast(self):
         x = zvector()
@@ -42,7 +42,7 @@ def test_complex(self):
         r, i = [real(c), imag(c)]
         assert r.type == fvector
         assert i.type == fvector
-        f = theano.function([m], [r, i])
+        f = aesara.function([m], [r, i])
 
         mval = np.asarray(rng.randn(2, 5), dtype="float32")
         rval, ival = f(mval)
diff --git a/tests/tensor/test_elemwise.py b/tests/tensor/test_elemwise.py
index 6c8c08d74d..fb2cc0a168 100644
--- a/tests/tensor/test_elemwise.py
+++ b/tests/tensor/test_elemwise.py
@@ -4,23 +4,21 @@
 import numpy as np
 import pytest
 
+import aesara
+import aesara.scalar as ts
 import tests.unittest_tools as utt
-import theano
-import theano.scalar as ts
-from tests import unittest_tools
-from tests.tensor.test_math import reduce_bitwise_and
-from theano.compile.mode import Mode
-from theano.configdefaults import config
-from theano.graph.basic import Variable
-from theano.graph.fg import FunctionGraph
-from theano.link.basic import PerformLinker
-from theano.link.c.basic import CLinker, OpWiseCLinker
-from theano.tensor import as_tensor_variable
-from theano.tensor.basic import second
-from theano.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from theano.tensor.math import all as tt_all
-from theano.tensor.math import any as tt_any
-from theano.tensor.type import (
+from aesara.compile.mode import Mode
+from aesara.configdefaults import config
+from aesara.graph.basic import Variable
+from aesara.graph.fg import FunctionGraph
+from aesara.link.basic import PerformLinker
+from aesara.link.c.basic import CLinker, OpWiseCLinker
+from aesara.tensor import as_tensor_variable
+from aesara.tensor.basic import second
+from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise
+from aesara.tensor.math import all as tt_all
+from aesara.tensor.math import any as tt_any
+from aesara.tensor.type import (
     TensorType,
     bmatrix,
     bscalar,
@@ -29,12 +27,14 @@
     scalar,
     vectors,
 )
+from tests import unittest_tools
+from tests.tensor.test_math import reduce_bitwise_and
 
 
 class TestDimShuffle(unittest_tools.InferShapeTester):
     op = DimShuffle
     type = TensorType
-    dtype = theano.config.floatX
+    dtype = aesara.config.floatX
 
     def with_linker(self, linker):
         for xsh, shuffle, zsh in [
@@ -134,10 +134,10 @@ class TestBroadcast:
     linkers = [PerformLinker, CLinker]
 
     def rand_val(self, shp):
-        return np.asarray(np.random.rand(*shp), dtype=theano.config.floatX)
+        return np.asarray(np.random.rand(*shp), dtype=aesara.config.floatX)
 
     def rand_cval(self, shp):
-        return np.asarray(np.random.rand(*shp), dtype=theano.config.floatX)
+        return np.asarray(np.random.rand(*shp), dtype=aesara.config.floatX)
 
     def setup_method(self):
         unittest_tools.seed_rng()
@@ -159,8 +159,8 @@ def with_linker(self, linker, op, type, rand_val):
             ((2, 3, 4, 5), (1, 1, 1, 1)),
             ((), ()),
         ]:
-            x = type(theano.config.floatX, [(entry == 1) for entry in xsh])("x")
-            y = type(theano.config.floatX, [(entry == 1) for entry in ysh])("y")
+            x = type(aesara.config.floatX, [(entry == 1) for entry in xsh])("x")
+            y = type(aesara.config.floatX, [(entry == 1) for entry in ysh])("y")
             e = op(ts.add)(x, y)
             f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function()
             xv = rand_val(xsh)
@@ -172,8 +172,8 @@ def with_linker(self, linker, op, type, rand_val):
             # test Elemwise.infer_shape
             # the Shape op don't implement c_code!
             if isinstance(linker, PerformLinker):
-                x = type(theano.config.floatX, [(entry == 1) for entry in xsh])("x")
-                y = type(theano.config.floatX, [(entry == 1) for entry in ysh])("y")
+                x = type(aesara.config.floatX, [(entry == 1) for entry in xsh])("x")
+                y = type(aesara.config.floatX, [(entry == 1) for entry in ysh])("y")
                 e = op(ts.add)(x, y)
                 f = (
                     copy(linker)
@@ -193,8 +193,8 @@ def with_linker_inplace(self, linker, op, type, rand_val):
             ((2, 3, 4, 5), (1, 1, 1, 1)),
             ((), ()),
         ]:
-            x = type(theano.config.floatX, [(entry == 1) for entry in xsh])("x")
-            y = type(theano.config.floatX, [(entry == 1) for entry in ysh])("y")
+            x = type(aesara.config.floatX, [(entry == 1) for entry in xsh])("x")
+            y = type(aesara.config.floatX, [(entry == 1) for entry in ysh])("y")
             e = op(ts.Add(ts.transfer_type(0)), {0: 0})(x, y)
             f = copy(linker).accept(FunctionGraph([x, y], [e])).make_function()
             xv = rand_val(xsh)
@@ -207,8 +207,8 @@ def with_linker_inplace(self, linker, op, type, rand_val):
             # test Elemwise.infer_shape
             # the Shape op don't implement c_code!
             if isinstance(linker, PerformLinker):
-                x = type(theano.config.floatX, [(entry == 1) for entry in xsh])("x")
-                y = type(theano.config.floatX, [(entry == 1) for entry in ysh])("y")
+                x = type(aesara.config.floatX, [(entry == 1) for entry in xsh])("x")
+                y = type(aesara.config.floatX, [(entry == 1) for entry in ysh])("y")
                 e = op(ts.Add(ts.transfer_type(0)), {0: 0})(x, y)
                 f = (
                     copy(linker)
@@ -227,7 +227,7 @@ def test_perform(self):
         self.with_linker(PerformLinker(), self.op, self.type, self.rand_val)
 
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_c(self):
         self.with_linker(CLinker(), self.cop, self.ctype, self.rand_cval)
@@ -236,13 +236,13 @@ def test_perform_inplace(self):
         self.with_linker_inplace(PerformLinker(), self.op, self.type, self.rand_val)
 
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_c_inplace(self):
         self.with_linker_inplace(CLinker(), self.cop, self.ctype, self.rand_cval)
 
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_fill(self):
         for linker, op, t, rval in zip(
@@ -251,8 +251,8 @@ def test_fill(self):
             [self.type, self.ctype],
             [self.rand_val, self.rand_cval],
         ):
-            x = t(theano.config.floatX, [0, 0])("x")
-            y = t(theano.config.floatX, [1, 1])("y")
+            x = t(aesara.config.floatX, [0, 0])("x")
+            y = t(aesara.config.floatX, [1, 1])("y")
             e = op(ts.Second(ts.transfer_type(0)), {0: 0})(x, y)
             f = linker().accept(FunctionGraph([x, y], [e])).make_function()
             xv = rval((5, 5))
@@ -270,10 +270,10 @@ def test_fill_grad(self):
         x = TensorType(config.floatX, [0, 1, 0])("x")
         y = TensorType(config.floatX, [0, 1, 0])("y")
         e = second(x, y)
-        theano.grad(e.sum(), y)
+        aesara.grad(e.sum(), y)
 
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_weird_strides(self):
         for linker, op, t, rval in zip(
@@ -282,8 +282,8 @@ def test_weird_strides(self):
             [self.type, self.ctype],
             [self.rand_val, self.rand_cval],
         ):
-            x = t(theano.config.floatX, [0, 0, 0, 0, 0])("x")
-            y = t(theano.config.floatX, [0, 0, 0, 0, 0])("y")
+            x = t(aesara.config.floatX, [0, 0, 0, 0, 0])("x")
+            y = t(aesara.config.floatX, [0, 0, 0, 0, 0])("y")
             e = op(ts.add)(x, y)
             f = linker().accept(FunctionGraph([x, y], [e])).make_function()
             xv = rval((2, 2, 2, 2, 2))
@@ -292,7 +292,7 @@ def test_weird_strides(self):
             assert (f(xv, yv) == zv).all()
 
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_same_inputs(self):
         for linker, op, t, rval in zip(
@@ -301,7 +301,7 @@ def test_same_inputs(self):
             [self.type, self.ctype],
             [self.rand_val, self.rand_cval],
         ):
-            x = t(theano.config.floatX, [0, 0])("x")
+            x = t(aesara.config.floatX, [0, 0])("x")
             e = op(ts.add)(x, x)
             f = linker().accept(FunctionGraph([x], [e])).make_function()
             xv = rval((2, 2))
@@ -341,7 +341,7 @@ def with_mode(
     ):
         for xsh, tosum in self.cases:
             if dtype == "floatX":
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
             x = self.type(dtype, [(entry == 1) for entry in xsh])("x")
             d = {}
             if pre_scalar_op is not None:
@@ -354,7 +354,7 @@ def with_mode(
             if tosum is None:
                 tosum = list(range(len(xsh)))
 
-            f = theano.function([x], e, mode=mode)
+            f = aesara.function([x], e, mode=mode)
             xv = np.asarray(np.random.rand(*xsh))
 
             if dtype not in discrete_dtypes:
@@ -458,7 +458,7 @@ def with_mode(
                 e = tensor_op(x, axis=tosum)
             if tosum is None:
                 tosum = list(range(len(xsh)))
-            f = theano.function([x], e.shape, mode=mode)
+            f = aesara.function([x], e.shape, mode=mode)
             if not (
                 scalar_op in [ts.scalar_maximum, ts.scalar_minimum]
                 and (xsh == () or np.prod(xsh) == 0)
@@ -511,7 +511,7 @@ def test_perform_nan(self):
             )
 
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_c_noopt(self):
         # We need to make sure that we cover the corner cases that
@@ -520,7 +520,7 @@ def test_c_noopt(self):
 
     @pytest.mark.slow
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_c(self):
         for dtype in ["bool", "floatX", "complex64", "complex128", "int8", "uint8"]:
@@ -538,7 +538,7 @@ def test_c(self):
 
     @pytest.mark.slow
     @pytest.mark.skipif(
-        not theano.config.cxx, reason="G++ not available, so we need to skip this test."
+        not aesara.config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_c_nan(self):
         for dtype in ["floatX", "complex64", "complex128"]:
@@ -554,7 +554,7 @@ def test_c_nan(self):
 
     def test_infer_shape(self, dtype=None, pre_scalar_op=None):
         if dtype is None:
-            dtype = theano.config.floatX
+            dtype = aesara.config.floatX
         for xsh, tosum in self.cases:
             x = self.type(dtype, [(entry == 1) for entry in xsh])("x")
             if pre_scalar_op is not None:
@@ -583,8 +583,8 @@ def setup_method(self):
     def test_all_grad(self):
         x = bmatrix("x")
         x_all = x.all()
-        gx = theano.grad(x_all, x)
-        f = theano.function([x], gx)
+        gx = aesara.grad(x_all, x)
+        f = aesara.function([x], gx)
         x_random = self.rng.binomial(n=1, p=0.5, size=(5, 7)).astype("int8")
         for x_val in (x_random, np.zeros_like(x_random), np.ones_like(x_random)):
             gx_val = f(x_val)
@@ -594,8 +594,8 @@ def test_all_grad(self):
     def test_any_grad(self):
         x = bmatrix("x")
         x_all = x.any()
-        gx = theano.grad(x_all, x)
-        f = theano.function([x], gx)
+        gx = aesara.grad(x_all, x)
+        f = aesara.function([x], gx)
         x_random = self.rng.binomial(n=1, p=0.5, size=(5, 7)).astype("int8")
         for x_val in (x_random, np.zeros_like(x_random), np.ones_like(x_random)):
             gx_val = f(x_val)
@@ -608,7 +608,7 @@ def test_elemwise_grad_bool(self):
         x = scalar(dtype="bool")
         y = bscalar()
         z = x * y
-        dx, dy = theano.grad(z, [x, y])
+        dx, dy = aesara.grad(z, [x, y])
 
     def test_infer_shape(self):
 
@@ -624,7 +624,7 @@ def test_infer_shape(self):
             ((2, 1, 4, 5), (2, 3, 4, 5)),
             ((2, 3, 4, 1), (2, 3, 4, 5)),
         ]:
-            dtype = theano.config.floatX
+            dtype = aesara.config.floatX
             t_left = TensorType(dtype, [(entry == 1) for entry in s_left])()
             t_right = TensorType(dtype, [(entry == 1) for entry in s_right])()
             t_left_val = np.zeros(s_left, dtype=dtype)
@@ -642,7 +642,7 @@ def test_input_dimensions_overflow(self):
         # it overflowed in this case.
         a, b, c, d, e, f = vectors("abcdef")
         s = a + b + c + d + e + f
-        g = theano.function([a, b, c, d, e, f], s, mode=Mode(linker="py"))
+        g = aesara.function([a, b, c, d, e, f], s, mode=Mode(linker="py"))
         g(*[np.zeros(2 ** 11, config.floatX) for i in range(6)])
 
 
@@ -660,12 +660,12 @@ def grad(self, inputs, gout):
             (n, x) = inputs
             (gz,) = gout
             dy_dx = n
-            return [theano.gradient.grad_not_implemented(self, 0, n), gz * dy_dx]
+            return [aesara.gradient.grad_not_implemented(self, 0, n), gz * dy_dx]
 
     test_op = Elemwise(TestOp())
     x = scalar()
-    assert isinstance(theano.gradient.grad(test_op(2, x), x), Variable)
+    assert isinstance(aesara.gradient.grad(test_op(2, x), x), Variable)
 
     # Verify that trying to use the not implemented gradient fails.
-    with pytest.raises(theano.gradient.NullTypeGradError):
-        theano.gradient.grad(test_op(x, 2), x)
+    with pytest.raises(aesara.gradient.NullTypeGradError):
+        aesara.gradient.grad(test_op(x, 2), x)
diff --git a/tests/tensor/test_extra_ops.py b/tests/tensor/test_extra_ops.py
index 24c373a3c8..f161c86920 100644
--- a/tests/tensor/test_extra_ops.py
+++ b/tests/tensor/test_extra_ops.py
@@ -1,16 +1,15 @@
 import numpy as np
 import pytest
 
-import theano
-from tests import unittest_tools as utt
-from theano import function
-from theano import tensor as tt
-from theano.assert_op import Assert
-from theano.configdefaults import config
-from theano.gradient import grad
-from theano.graph.basic import applys_between
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.extra_ops import (
+import aesara
+from aesara import function
+from aesara import tensor as tt
+from aesara.assert_op import Assert
+from aesara.configdefaults import config
+from aesara.gradient import grad
+from aesara.graph.basic import applys_between
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.extra_ops import (
     Bartlett,
     BroadcastTo,
     CpuContiguous,
@@ -41,8 +40,8 @@
     to_one_hot,
     unravel_index,
 )
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.type import (
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.type import (
     TensorType,
     dmatrix,
     dscalar,
@@ -59,14 +58,15 @@
     tensor3,
     vector,
 )
-from theano.utils import LOCAL_BITWIDTH, PYTHON_INT_BITWIDTH
+from aesara.utils import LOCAL_BITWIDTH, PYTHON_INT_BITWIDTH
+from tests import unittest_tools as utt
 
 
 def test_cpu_contiguous():
     a = fmatrix("a")
     i = iscalar("i")
     a_val = np.asarray(np.random.rand(4, 5), dtype="float32")
-    f = theano.function([a, i], cpu_contiguous(a.reshape((5, 4))[::i]))
+    f = aesara.function([a, i], cpu_contiguous(a.reshape((5, 4))[::i]))
     topo = f.maker.fgraph.toposort()
     assert any([isinstance(node.op, CpuContiguous) for node in topo])
     assert f(a_val, 1).flags["C_CONTIGUOUS"]
@@ -91,14 +91,14 @@ def setup_method(self):
         self.idx_sorted = np.argsort(self.a).astype("int32")
 
     def test_searchsortedOp_on_sorted_input(self):
-        f = theano.function([self.x, self.v], searchsorted(self.x, self.v))
+        f = aesara.function([self.x, self.v], searchsorted(self.x, self.v))
         assert np.allclose(
             np.searchsorted(self.a[self.idx_sorted], self.b),
             f(self.a[self.idx_sorted], self.b),
         )
 
         sorter = vector("sorter", dtype="int32")
-        f = theano.function(
+        f = aesara.function(
             [self.x, self.v, sorter],
             self.x.searchsorted(self.v, sorter=sorter, side="right"),
         )
@@ -108,7 +108,7 @@ def test_searchsortedOp_on_sorted_input(self):
         )
 
         sa = self.a[self.idx_sorted]
-        f = theano.function([self.x, self.v], self.x.searchsorted(self.v, side="right"))
+        f = aesara.function([self.x, self.v], self.x.searchsorted(self.v, side="right"))
         assert np.allclose(sa.searchsorted(self.b, side="right"), f(sa, self.b))
 
     def test_searchsortedOp_wrong_side_kwd(self):
@@ -134,7 +134,7 @@ def test_searchsortedOp_on_int_sorter(self):
         # 'uint8', 'uint16', 'uint32', 'uint64')
         for dtype in compatible_types:
             sorter = vector("sorter", dtype=dtype)
-            f = theano.function(
+            f = aesara.function(
                 [self.x, self.v, sorter],
                 searchsorted(self.x, self.v, sorter=sorter),
                 allow_input_downcast=True,
@@ -145,7 +145,7 @@ def test_searchsortedOp_on_int_sorter(self):
             )
 
     def test_searchsortedOp_on_right_side(self):
-        f = theano.function(
+        f = aesara.function(
             [self.x, self.v], searchsorted(self.x, self.v, side="right")
         )
         assert np.allclose(
@@ -204,13 +204,13 @@ def test_cum_op(self):
         with pytest.raises(ValueError):
             cumprod(x, axis=-4)
 
-        f = theano.function([x], [cumsum(x), cumprod(x)])
+        f = aesara.function([x], [cumsum(x), cumprod(x)])
         s, p = f(a)
         assert np.allclose(np.cumsum(a), s)  # Test axis=None
         assert np.allclose(np.cumprod(a), p)  # Test axis=None
 
         for axis in range(-len(a.shape), len(a.shape)):
-            f = theano.function([x], [cumsum(x, axis=axis), cumprod(x, axis=axis)])
+            f = aesara.function([x], [cumsum(x, axis=axis), cumprod(x, axis=axis)])
             s, p = f(a)
             assert np.allclose(np.cumsum(a, axis=axis), s)
             assert np.allclose(np.cumprod(a, axis=axis), p)
@@ -269,19 +269,19 @@ def ref(data, w=None, minlength=None):
             a = np.random.randint(1, 51, size=(25)).astype(dtype)
             weights = np.random.random((25,)).astype(config.floatX)
 
-            f1 = theano.function([x], bincount(x))
-            f2 = theano.function([x, w], bincount(x, weights=w))
+            f1 = aesara.function([x], bincount(x))
+            f2 = aesara.function([x, w], bincount(x, weights=w))
 
             assert (ref(a) == f1(a)).all()
             assert np.allclose(ref(a, weights), f2(a, weights))
-            f3 = theano.function([x], bincount(x, minlength=55))
-            f4 = theano.function([x], bincount(x, minlength=5))
+            f3 = aesara.function([x], bincount(x, minlength=55))
+            f4 = aesara.function([x], bincount(x, minlength=5))
             assert (ref(a, minlength=55) == f3(a)).all()
             assert (ref(a, minlength=5) == f4(a)).all()
             # skip the following test when using unsigned ints
             if not dtype.startswith("u"):
                 a[0] = -1
-                f5 = theano.function([x], bincount(x, assert_nonneg=True))
+                f5 = aesara.function([x], bincount(x, assert_nonneg=True))
                 with pytest.raises(AssertionError):
                     f5(a)
 
@@ -298,12 +298,12 @@ def test_diffOp(self):
         x = matrix("x")
         a = np.random.random((30, 50)).astype(config.floatX)
 
-        f = theano.function([x], diff(x))
+        f = aesara.function([x], diff(x))
         assert np.allclose(np.diff(a), f(a))
 
         for axis in range(len(a.shape)):
             for k in range(TestDiffOp.nb):
-                g = theano.function([x], diff(x, n=k, axis=axis))
+                g = aesara.function([x], diff(x, n=k, axis=axis))
                 assert np.allclose(np.diff(a, n=k, axis=axis), g(a))
 
     def test_infer_shape(self):
@@ -322,11 +322,11 @@ def test_grad(self):
         x = vector("x")
         a = np.random.random(50).astype(config.floatX)
 
-        theano.function([x], grad(tt_sum(diff(x)), x))
+        aesara.function([x], grad(tt_sum(diff(x)), x))
         utt.verify_grad(self.op, [a])
 
         for k in range(TestDiffOp.nb):
-            theano.function([x], grad(tt_sum(diff(x, n=k)), x))
+            aesara.function([x], grad(tt_sum(diff(x, n=k)), x))
             utt.verify_grad(DiffOp(n=k), [a], eps=7e-3)
 
 
@@ -347,7 +347,7 @@ def test_op(self):
             data = np.random.random(size=shape).astype(config.floatX)
             variable = TensorType(config.floatX, broadcast)()
 
-            f = theano.function([variable], self.op(variable))
+            f = aesara.function([variable], self.op(variable))
 
             expected = np.squeeze(data)
             tested = f(data)
@@ -371,12 +371,12 @@ def test_grad(self):
             utt.verify_grad(self.op, [data])
 
     def test_var_interface(self):
-        # same as test_op, but use a_theano_var.squeeze.
+        # same as test_op, but use a_aesara_var.squeeze.
         for shape, broadcast in zip(self.shape_list, self.broadcast_list):
             data = np.random.random(size=shape).astype(config.floatX)
             variable = TensorType(config.floatX, broadcast)()
 
-            f = theano.function([variable], variable.squeeze())
+            f = aesara.function([variable], variable.squeeze())
 
             expected = np.squeeze(data)
             tested = f(data)
@@ -423,7 +423,7 @@ def test_op(self):
             data = np.random.random(size=shape).astype(config.floatX)
             data_var = matrix()
 
-            f = theano.function(
+            f = aesara.function(
                 [cond_var, data_var], self.op(cond_var, data_var, axis=axis)
             )
 
@@ -464,7 +464,7 @@ def test_repeatOp(self):
                         with pytest.raises(TypeError):
                             repeat(x, r_var, axis=axis)
                     else:
-                        f = theano.function([x, r_var], repeat(x, r_var, axis=axis))
+                        f = aesara.function([x, r_var], repeat(x, r_var, axis=axis))
                         assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))
 
                         r_var = vector(dtype=dtype)
@@ -477,12 +477,12 @@ def test_repeatOp(self):
                             with pytest.raises(TypeError):
                                 repeat(x, r_var, axis=axis)
                         else:
-                            f = theano.function([x, r_var], repeat(x, r_var, axis=axis))
+                            f = aesara.function([x, r_var], repeat(x, r_var, axis=axis))
                             assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))
 
                         # check when r is a list of single integer, e.g. [3].
                         r = np.random.randint(1, 11, size=()).astype(dtype) + 2
-                        f = theano.function([x], repeat(x, [r], axis=axis))
+                        f = aesara.function([x], repeat(x, [r], axis=axis))
                         assert np.allclose(np.repeat(a, r, axis=axis), f(a))
                         assert not np.any(
                             [
@@ -491,10 +491,10 @@ def test_repeatOp(self):
                             ]
                         )
 
-                        # check when r is  theano tensortype that broadcastable is (True,)
+                        # check when r is  aesara tensortype that broadcastable is (True,)
                         r_var = TensorType(broadcastable=(True,), dtype=dtype)()
                         r = np.random.randint(1, 6, size=(1,)).astype(dtype)
-                        f = theano.function([x, r_var], repeat(x, r_var, axis=axis))
+                        f = aesara.function([x, r_var], repeat(x, r_var, axis=axis))
                         assert np.allclose(np.repeat(a, r[0], axis=axis), f(a, r))
                         assert not np.any(
                             [
@@ -732,7 +732,7 @@ def test_infer_shape(self):
 def test_to_one_hot():
     v = ivector()
     o = to_one_hot(v, 10)
-    f = theano.function([v], o)
+    f = aesara.function([v], o)
     out = f([1, 2, 3, 5, 6])
     assert out.dtype == config.floatX
     assert np.allclose(
@@ -748,7 +748,7 @@ def test_to_one_hot():
 
     v = ivector()
     o = to_one_hot(v, 10, dtype="int32")
-    f = theano.function([v], o)
+    f = aesara.function([v], o)
     out = f([1, 2, 3, 5, 6])
     assert out.dtype == "int32"
     assert np.allclose(
@@ -795,7 +795,7 @@ def test_basic_vector(self):
             np.unique(inp, True, True, True),
         ]
         for op, outs_expected in zip(self.ops, list_outs_expected):
-            f = theano.function(inputs=[x], outputs=op(x, return_list=True))
+            f = aesara.function(inputs=[x], outputs=op(x, return_list=True))
             outs = f(inp)
             # Compare the result computed to the expected value.
             for out, out_exp in zip(outs, outs_expected):
@@ -818,7 +818,7 @@ def test_basic_matrix(self):
             np.unique(inp, True, True, True),
         ]
         for op, outs_expected in zip(self.ops, list_outs_expected):
-            f = theano.function(inputs=[x], outputs=op(x, return_list=True))
+            f = aesara.function(inputs=[x], outputs=op(x, return_list=True))
             outs = f(inp)
             # Compare the result computed to the expected value.
             for out, out_exp in zip(outs, outs_expected):
@@ -1002,7 +1002,7 @@ def test_basic_vector(self):
             for args, kwargs in self.ops_pars
         ]
         for op, outs_expected in zip(ops, list_outs_expected):
-            f = theano.function(inputs=[x], outputs=op(x, return_list=True))
+            f = aesara.function(inputs=[x], outputs=op(x, return_list=True))
             outs = f(inp)
             # Compare the result computed to the expected value.
             for out, out_exp in zip(outs, outs_expected):
@@ -1025,7 +1025,7 @@ def test_basic_matrix(self):
             for args, kwargs in self.ops_pars
         ]
         for op, outs_expected in zip(ops, list_outs_expected):
-            f = theano.function(inputs=[x], outputs=op(x, return_list=True))
+            f = aesara.function(inputs=[x], outputs=op(x, return_list=True))
             outs = f(inp)
             # Compare the result computed to the expected value.
             for out, out_exp in zip(outs, outs_expected):
@@ -1085,7 +1085,7 @@ def check(shape, index_ndim, order):
                 indices = indices[-1]
             elif index_ndim == 2:
                 indices = indices[:, np.newaxis]
-            indices_symb = theano.shared(indices)
+            indices_symb = aesara.shared(indices)
 
             # reference result
             ref = np.unravel_index(indices, shape, order=order)
@@ -1104,14 +1104,14 @@ def fn(i, d):
             f_array_array = fn(indices, shape_array)
             np.testing.assert_equal(ref, f_array_array())
 
-            # shape given as a theano variable
-            shape_symb = theano.shared(shape_array)
+            # shape given as an Aesara variable
+            shape_symb = aesara.shared(shape_array)
             f_array_symb = fn(indices, shape_symb)
             np.testing.assert_equal(ref, f_array_symb())
 
             # shape given as a Shape op (unravel_index will use get_vector_length
             # to infer the number of dimensions)
-            indexed_array = theano.shared(np.random.uniform(size=shape_array))
+            indexed_array = aesara.shared(np.random.uniform(size=shape_array))
             f_array_shape = fn(indices, indexed_array.shape)
             np.testing.assert_equal(ref, f_array_shape())
 
@@ -1160,7 +1160,7 @@ def check(shape, index_ndim, mode, order):
                 multi_index = tuple(i[-1] for i in multi_index)
             elif index_ndim == 2:
                 multi_index = tuple(i[:, np.newaxis] for i in multi_index)
-            multi_index_symb = [theano.shared(i) for i in multi_index]
+            multi_index_symb = [aesara.shared(i) for i in multi_index]
 
             # reference result
             ref = np.ravel_multi_index(multi_index, shape, mode, order)
@@ -1179,8 +1179,8 @@ def fn(mi, s):
             f_array_array = fn(multi_index, shape_array)
             np.testing.assert_equal(ref, f_array_array())
 
-            # shape given as a theano variable
-            shape_symb = theano.shared(shape_array)
+            # shape given as an Aesara variable
+            shape_symb = aesara.shared(shape_array)
             f_array_symb = fn(multi_index, shape_symb)
             np.testing.assert_equal(ref, f_array_symb())
 
diff --git a/tests/tensor/test_fft.py b/tests/tensor/test_fft.py
index 0bd1050830..e5a2ce0ce5 100644
--- a/tests/tensor/test_fft.py
+++ b/tests/tensor/test_fft.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara.tensor import fft
+from aesara.tensor.type import matrix
 from tests import unittest_tools as utt
-from theano.tensor import fft
-from theano.tensor.type import matrix
 
 
 N = 16
@@ -12,27 +12,27 @@
 
 class TestFFT:
     def test_rfft_float(self):
-        # Test that numpy's default float64 output is cast to theano input type
+        # Test that numpy's default float64 output is cast to aesara input type
         eps = 1e-1
 
         def f_rfft(inp):
             return fft.rfft(inp)
 
-        inputs_val = np.random.random((1, N)).astype(theano.config.floatX)
+        inputs_val = np.random.random((1, N)).astype(aesara.config.floatX)
         utt.verify_grad(f_rfft, [inputs_val], eps=eps)
 
         def f_irfft(inp):
             return fft.irfft(inp)
 
-        inputs_val = np.random.random((1, N // 2 + 1, 2)).astype(theano.config.floatX)
+        inputs_val = np.random.random((1, N // 2 + 1, 2)).astype(aesara.config.floatX)
         utt.verify_grad(f_irfft, [inputs_val], eps=eps)
 
     def test_1Drfft(self):
-        inputs_val = np.random.random((1, N)).astype(theano.config.floatX)
+        inputs_val = np.random.random((1, N)).astype(aesara.config.floatX)
 
         x = matrix("x")
         rfft = fft.rfft(x)
-        f_rfft = theano.function([x], rfft)
+        f_rfft = aesara.function([x], rfft)
         res_rfft = f_rfft(inputs_val)
         res_rfft_comp = np.asarray(res_rfft[:, :, 0]) + 1j * np.asarray(
             res_rfft[:, :, 1]
@@ -45,7 +45,7 @@ def test_1Drfft(self):
         m = rfft.type()
         print(m.ndim)
         irfft = fft.irfft(m)
-        f_irfft = theano.function([m], irfft)
+        f_irfft = aesara.function([m], irfft)
         res_irfft = f_irfft(res_rfft)
 
         utt.assert_allclose(inputs_val, np.asarray(res_irfft))
@@ -57,21 +57,21 @@ def test_1Drfft(self):
         def f_rfft(inp):
             return fft.rfft(inp)
 
-        inputs_val = np.random.random((1, N)).astype(theano.config.floatX)
+        inputs_val = np.random.random((1, N)).astype(aesara.config.floatX)
         utt.verify_grad(f_rfft, [inputs_val], eps=eps)
 
         def f_irfft(inp):
             return fft.irfft(inp)
 
-        inputs_val = np.random.random((1, N // 2 + 1, 2)).astype(theano.config.floatX)
+        inputs_val = np.random.random((1, N // 2 + 1, 2)).astype(aesara.config.floatX)
         utt.verify_grad(f_irfft, [inputs_val], eps=eps)
 
     def test_rfft(self):
-        inputs_val = np.random.random((1, N, N)).astype(theano.config.floatX)
-        inputs = theano.shared(inputs_val)
+        inputs_val = np.random.random((1, N, N)).astype(aesara.config.floatX)
+        inputs = aesara.shared(inputs_val)
 
         rfft = fft.rfft(inputs)
-        f_rfft = theano.function([], rfft)
+        f_rfft = aesara.function([], rfft)
         res_rfft = f_rfft()
         res_rfft_comp = np.asarray(res_rfft[:, :, :, 0]) + 1j * np.asarray(
             res_rfft[:, :, :, 1]
@@ -82,25 +82,25 @@ def test_rfft(self):
         utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
 
     def test_irfft(self):
-        inputs_val = np.random.random((1, N, N)).astype(theano.config.floatX)
-        inputs = theano.shared(inputs_val)
+        inputs_val = np.random.random((1, N, N)).astype(aesara.config.floatX)
+        inputs = aesara.shared(inputs_val)
 
         rfft = fft.rfft(inputs)
-        f_rfft = theano.function([], rfft)
+        f_rfft = aesara.function([], rfft)
         res_fft = f_rfft()
 
         m = rfft.type()
         irfft = fft.irfft(m)
-        f_irfft = theano.function([m], irfft)
+        f_irfft = aesara.function([m], irfft)
         res_irfft = f_irfft(res_fft)
 
         utt.assert_allclose(inputs_val, np.asarray(res_irfft))
 
-        inputs_val = np.random.random((1, N, N, 2)).astype(theano.config.floatX)
-        inputs = theano.shared(inputs_val)
+        inputs_val = np.random.random((1, N, N, 2)).astype(aesara.config.floatX)
+        inputs = aesara.shared(inputs_val)
 
         irfft = fft.irfft(inputs)
-        f_irfft = theano.function([], irfft)
+        f_irfft = aesara.function([], irfft)
         res_irfft = f_irfft()
         inputs_ref = inputs_val[..., 0] + inputs_val[..., 1] * 1j
 
@@ -109,12 +109,12 @@ def test_irfft(self):
         utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
 
     def test_norm_rfft(self):
-        inputs_val = np.random.random((1, N, N)).astype(theano.config.floatX)
-        inputs = theano.shared(inputs_val)
+        inputs_val = np.random.random((1, N, N)).astype(aesara.config.floatX)
+        inputs = aesara.shared(inputs_val)
 
         # Unitary normalization
         rfft = fft.rfft(inputs, norm="ortho")
-        f_rfft = theano.function([], rfft)
+        f_rfft = aesara.function([], rfft)
         res_rfft = f_rfft()
         res_rfft_comp = np.asarray(res_rfft[:, :, :, 0]) + 1j * np.asarray(
             res_rfft[:, :, :, 1]
@@ -126,7 +126,7 @@ def test_norm_rfft(self):
 
         # No normalization
         rfft = fft.rfft(inputs, norm="no_norm")
-        f_rfft = theano.function([], rfft)
+        f_rfft = aesara.function([], rfft)
         res_rfft = f_rfft()
         res_rfft_comp = np.asarray(res_rfft[:, :, :, 0]) + 1j * np.asarray(
             res_rfft[:, :, :, 1]
@@ -136,14 +136,14 @@ def test_norm_rfft(self):
 
         # Inverse FFT inputs
         inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype(
-            theano.config.floatX
+            aesara.config.floatX
         )
-        inputs = theano.shared(inputs_val)
+        inputs = aesara.shared(inputs_val)
         inputs_ref = inputs_val[..., 0] + 1j * inputs_val[..., 1]
 
         # Unitary normalization inverse FFT
         irfft = fft.irfft(inputs, norm="ortho")
-        f_irfft = theano.function([], irfft)
+        f_irfft = aesara.function([], irfft)
         res_irfft = f_irfft()
 
         irfft_ref = np.fft.irfftn(inputs_ref, axes=(1, 2))
@@ -152,20 +152,20 @@ def test_norm_rfft(self):
 
         # No normalization inverse FFT
         irfft = fft.irfft(inputs, norm="no_norm")
-        f_irfft = theano.function([], irfft)
+        f_irfft = aesara.function([], irfft)
         res_irfft = f_irfft()
 
         utt.assert_allclose(irfft_ref * N ** 2, res_irfft, atol=1e-4, rtol=1e-4)
 
     def test_params(self):
-        inputs_val = np.random.random((1, N)).astype(theano.config.floatX)
-        inputs = theano.shared(inputs_val)
+        inputs_val = np.random.random((1, N)).astype(aesara.config.floatX)
+        inputs = aesara.shared(inputs_val)
 
         with pytest.raises(ValueError):
             fft.rfft(inputs, norm=123)
 
-        inputs_val = np.random.random((1, N // 2 + 1, 2)).astype(theano.config.floatX)
-        inputs = theano.shared(inputs_val)
+        inputs_val = np.random.random((1, N // 2 + 1, 2)).astype(aesara.config.floatX)
+        inputs = aesara.shared(inputs_val)
 
         with pytest.raises(ValueError):
             fft.irfft(inputs, norm=123)
@@ -180,27 +180,27 @@ def test_grad_rfft(self):
         def f_rfft(inp):
             return fft.rfft(inp)
 
-        inputs_val = np.random.random((1, N, N)).astype(theano.config.floatX)
+        inputs_val = np.random.random((1, N, N)).astype(aesara.config.floatX)
         utt.verify_grad(f_rfft, [inputs_val], eps=eps)
 
         def f_irfft(inp):
             return fft.irfft(inp)
 
         inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype(
-            theano.config.floatX
+            aesara.config.floatX
         )
         utt.verify_grad(f_irfft, [inputs_val], eps=eps)
 
         def f_rfft(inp):
             return fft.rfft(inp, norm="ortho")
 
-        inputs_val = np.random.random((1, N, N)).astype(theano.config.floatX)
+        inputs_val = np.random.random((1, N, N)).astype(aesara.config.floatX)
         utt.verify_grad(f_rfft, [inputs_val], eps=eps)
 
         def f_irfft(inp):
             return fft.irfft(inp, norm="no_norm")
 
         inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype(
-            theano.config.floatX
+            aesara.config.floatX
         )
         utt.verify_grad(f_irfft, [inputs_val], eps=eps)
diff --git a/tests/tensor/test_fourier.py b/tests/tensor/test_fourier.py
index 0868dfd6a5..1514c5090e 100644
--- a/tests/tensor/test_fourier.py
+++ b/tests/tensor/test_fourier.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara.tensor.fourier import Fourier, fft
+from aesara.tensor.type import dmatrix, dvector, iscalar
 from tests import unittest_tools as utt
-from theano.tensor.fourier import Fourier, fft
-from theano.tensor.type import dmatrix, dvector, iscalar
 
 
 class TestFourier(utt.InferShapeTester):
@@ -18,7 +18,7 @@ def setup_method(self):
 
     def test_perform(self):
         a = dmatrix()
-        f = theano.function([a], self.op(a, n=10, axis=0))
+        f = aesara.function([a], self.op(a, n=10, axis=0))
         a = np.random.rand(8, 6)
         assert np.allclose(f(a), np.fft.fft(a, 10, 0))
 
@@ -63,7 +63,7 @@ def fft_test4(a):
         ]
         for fft_test in [fft_test1, fft_test2, fft_test3, fft_test4]:
             for pt in pts:
-                theano.gradient.verify_grad(
+                aesara.gradient.verify_grad(
                     fft_test, [pt], n_tests=1, rng=TestFourier.rng, out_type="complex64"
                 )
 
diff --git a/tests/tensor/test_gc.py b/tests/tensor/test_gc.py
index 030d6546fb..10dd88346f 100644
--- a/tests/tensor/test_gc.py
+++ b/tests/tensor/test_gc.py
@@ -3,17 +3,17 @@
 
 import numpy as np
 
-import theano
-from theano.compile.mode import Mode
-from theano.link.basic import PerformLinker
-from theano.link.c.basic import OpWiseCLinker
-from theano.tensor.type import dvector, lvector
+import aesara
+from aesara.compile.mode import Mode
+from aesara.link.basic import PerformLinker
+from aesara.link.c.basic import OpWiseCLinker
+from aesara.tensor.type import dvector, lvector
 
 
 def test_no_reuse():
     x = lvector()
     y = lvector()
-    f = theano.function([x, y], x + y)
+    f = aesara.function([x, y], x + y)
 
     # provide both inputs in the first call
     f(np.ones(10, dtype="int64"), np.ones(10, dtype="int64"))
@@ -43,8 +43,8 @@ def test_gc_never_pickles_temporaries():
 
         # g_linker has no garbage collection
 
-        f = theano.function([x], r, mode=Mode(optimizer=optimizer, linker=f_linker))
-        g = theano.function([x], r, mode=Mode(optimizer=optimizer, linker=g_linker))
+        f = aesara.function([x], r, mode=Mode(optimizer=optimizer, linker=f_linker))
+        g = aesara.function([x], r, mode=Mode(optimizer=optimizer, linker=g_linker))
 
         pre_f = pickle.dumps(f)
         # pre_g = pickle.dumps(g)
@@ -64,7 +64,7 @@ def a(fn):
         assert a(g) == a(g)  # some sanity checks on the pickling mechanism
 
         def b(fn):
-            return len(pickle.dumps(theano.compile.function.types._pickle_Function(fn)))
+            return len(pickle.dumps(aesara.compile.function.types._pickle_Function(fn)))
 
         assert b(f) == b(f)  # some sanity checks on the pickling mechanism
 
@@ -109,7 +109,7 @@ def test_merge_opt_runtime():
         r = r + r / 10
 
     t = time.time()
-    theano.function([x], r, mode="FAST_COMPILE")
+    aesara.function([x], r, mode="FAST_COMPILE")
     # FAST_RUN does in-place optimizer which requires a lot of
     # toposorting, which is actually pretty slow at the moment.  This
     # test was designed to test MergeOptimizer... so I'm leaving
diff --git a/tests/tensor/test_inplace.py b/tests/tensor/test_inplace.py
index 78f0902ada..a5237de589 100644
--- a/tests/tensor/test_inplace.py
+++ b/tests/tensor/test_inplace.py
@@ -1,42 +1,10 @@
 import numpy as np
 import pytest
 
-from tests import unittest_tools as utt
-from tests.tensor.utils import (
-    _bad_build_broadcast_binary_normal,
-    _bad_runtime_broadcast_binary_normal,
-    _bad_runtime_inv,
-    _good_broadcast_binary_arctan2,
-    _good_broadcast_binary_normal,
-    _good_broadcast_div_mod_normal_float_inplace,
-    _good_broadcast_pow_normal_float_pow,
-    _good_broadcast_unary_arccosh,
-    _good_broadcast_unary_arcsin_float,
-    _good_broadcast_unary_arctanh,
-    _good_broadcast_unary_normal,
-    _good_broadcast_unary_normal_abs,
-    _good_broadcast_unary_normal_float,
-    _good_broadcast_unary_normal_float_no_complex,
-    _good_broadcast_unary_normal_float_no_empty_no_complex,
-    _good_broadcast_unary_normal_no_complex,
-    _good_broadcast_unary_positive_float,
-    _good_broadcast_unary_tan,
-    _good_broadcast_unary_wide_float,
-    _good_inv_inplace,
-    _numpy_true_div,
-    angle_eps,
-    check_floatX,
-    copymod,
-    div_grad_rtol,
-    ignore_isfinite_mode,
-    inplace_func,
-    makeBroadcastTester,
-    upcast_float16_ufunc,
-)
-from theano import config
-from theano.misc.safe_asarray import _asarray
-from theano.scalar.basic import round_half_away_from_zero_vec, upcast
-from theano.tensor.inplace import (
+from aesara import config
+from aesara.misc.safe_asarray import _asarray
+from aesara.scalar.basic import round_half_away_from_zero_vec, upcast
+from aesara.tensor.inplace import (
     abs__inplace,
     add_inplace,
     arccos_inplace,
@@ -82,7 +50,39 @@
     trunc_inplace,
     xor_inplace,
 )
-from theano.tensor.type import vector
+from aesara.tensor.type import vector
+from tests import unittest_tools as utt
+from tests.tensor.utils import (
+    _bad_build_broadcast_binary_normal,
+    _bad_runtime_broadcast_binary_normal,
+    _bad_runtime_inv,
+    _good_broadcast_binary_arctan2,
+    _good_broadcast_binary_normal,
+    _good_broadcast_div_mod_normal_float_inplace,
+    _good_broadcast_pow_normal_float_pow,
+    _good_broadcast_unary_arccosh,
+    _good_broadcast_unary_arcsin_float,
+    _good_broadcast_unary_arctanh,
+    _good_broadcast_unary_normal,
+    _good_broadcast_unary_normal_abs,
+    _good_broadcast_unary_normal_float,
+    _good_broadcast_unary_normal_float_no_complex,
+    _good_broadcast_unary_normal_float_no_empty_no_complex,
+    _good_broadcast_unary_normal_no_complex,
+    _good_broadcast_unary_positive_float,
+    _good_broadcast_unary_tan,
+    _good_broadcast_unary_wide_float,
+    _good_inv_inplace,
+    _numpy_true_div,
+    angle_eps,
+    check_floatX,
+    copymod,
+    div_grad_rtol,
+    ignore_isfinite_mode,
+    inplace_func,
+    makeBroadcastTester,
+    upcast_float16_ufunc,
+)
 
 
 TestAddInplaceBroadcast = makeBroadcastTester(
diff --git a/tests/tensor/test_io.py b/tests/tensor/test_io.py
index 723d222d3d..5f3afc22c1 100644
--- a/tests/tensor/test_io.py
+++ b/tests/tensor/test_io.py
@@ -3,17 +3,17 @@
 import numpy as np
 import pytest
 
-import theano
-from theano import function
-from theano.graph.basic import Variable
-from theano.graph.type import Generic
-from theano.tensor.io import load
+import aesara
+from aesara import function
+from aesara.graph.basic import Variable
+from aesara.graph.type import Generic
+from aesara.tensor.io import load
 
 
 class TestLoadTensor:
     def setup_method(self):
         self.data = np.arange(5, dtype=np.int32)
-        self.filename = os.path.join(theano.config.compiledir, "_test.npy")
+        self.filename = os.path.join(aesara.config.compiledir, "_test.npy")
         np.save(self.filename, self.data)
 
     def test_basic(self):
@@ -26,7 +26,7 @@ def test_basic(self):
         assert (fn(self.filename) == (self.data * 2)).all()
 
     def test_invalid_modes(self):
-        # Modes 'r+', 'r', and 'w+' cannot work with Theano, becausei
+        # Modes 'r+', 'r', and 'w+' cannot work with Aesara, becausei
         # the output array may be modified inplace, and that should not
         # modify the original file.
         path = Variable(Generic())
@@ -54,4 +54,4 @@ def test_memmap(self):
         assert type(fn(self.filename)) == np.core.memmap
 
     def teardown_method(self):
-        os.remove(os.path.join(theano.config.compiledir, "_test.npy"))
+        os.remove(os.path.join(aesara.config.compiledir, "_test.npy"))
diff --git a/tests/tensor/test_keepdims.py b/tests/tensor/test_keepdims.py
index 811e8398fb..4d61f76060 100644
--- a/tests/tensor/test_keepdims.py
+++ b/tests/tensor/test_keepdims.py
@@ -1,20 +1,20 @@
 import numpy as np
 import pytest
 
-import theano
-from theano import function
-from theano.compile.mode import Mode
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.math import all as tt_all
-from theano.tensor.math import any as tt_any
-from theano.tensor.math import argmax, argmin
-from theano.tensor.math import max as tt_max
-from theano.tensor.math import max_and_argmax, mean
-from theano.tensor.math import min as tt_min
-from theano.tensor.math import prod, std
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import var
-from theano.tensor.type import dtensor3
+import aesara
+from aesara import function
+from aesara.compile.mode import Mode
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.math import all as tt_all
+from aesara.tensor.math import any as tt_any
+from aesara.tensor.math import argmax, argmin
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.math import max_and_argmax, mean
+from aesara.tensor.math import min as tt_min
+from aesara.tensor.math import prod, std
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import var
+from aesara.tensor.type import dtensor3
 
 
 # this tests other ops to ensure they keep the dimensions of their
@@ -185,7 +185,7 @@ def test_keepdims(self):
     def test_norm(self):
 
         x = dtensor3()
-        a = np.random.rand(3, 2, 4).astype(theano.config.floatX)
+        a = np.random.rand(3, 2, 4).astype(aesara.config.floatX)
         mode = Mode(optimizer="fast_compile", linker="py")
 
         for axis in [
diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py
index 6f462c1794..c8050db6a8 100644
--- a/tests/tensor/test_math.py
+++ b/tests/tensor/test_math.py
@@ -10,76 +10,27 @@
 import pytest
 from numpy.testing import assert_array_equal
 
-import theano.scalar as ts
-from tests import unittest_tools as utt
-from tests.tensor.utils import (
-    _bad_build_broadcast_binary_normal,
-    _bad_runtime_broadcast_binary_normal,
-    _bad_runtime_inv,
-    _eps,
-    _good_broadcast_binary_arctan2,
-    _good_broadcast_binary_normal,
-    _good_broadcast_div_mod_normal_float,
-    _good_broadcast_div_mod_normal_float_no_complex,
-    _good_broadcast_pow_normal_float,
-    _good_broadcast_unary_arccosh,
-    _good_broadcast_unary_arcsin,
-    _good_broadcast_unary_arctanh,
-    _good_broadcast_unary_normal,
-    _good_broadcast_unary_normal_float_no_complex,
-    _good_broadcast_unary_normal_float_no_empty_no_complex,
-    _good_broadcast_unary_normal_no_complex,
-    _good_broadcast_unary_positive,
-    _good_broadcast_unary_tan,
-    _good_broadcast_unary_wide,
-    _good_inv,
-    _grad_broadcast_binary_normal,
-    _grad_broadcast_pow_normal,
-    _grad_broadcast_unary_normal,
-    _grad_broadcast_unary_normal_no_complex,
-    _grad_broadcast_unary_normal_no_complex_no_corner_case,
-    _grad_broadcast_unary_normal_noint,
-    _grad_inv,
-    _numpy_true_div,
-    angle_eps,
-    check_floatX,
-    copymod,
-    div_grad_rtol,
-    eval_outputs,
-    get_numeric_types,
-    ignore_isfinite_mode,
-    inplace_func,
-    makeBroadcastTester,
-    makeTester,
-    rand,
-    rand_nonzero,
-    rand_ranged,
-    randcomplex,
-    randint,
-    randuint32,
-    upcast_float16_ufunc,
-    upcast_int8_nfunc,
-)
-from theano.compile.debugmode import DebugMode
-from theano.compile.function import function
-from theano.compile.mode import get_default_mode
-from theano.compile.sharedvalue import shared
-from theano.configdefaults import config
-from theano.gradient import NullTypeGradError, grad, numeric_grad
-from theano.graph.basic import Variable, applys_between
-from theano.graph.fg import FunctionGraph
-from theano.link.c.basic import DualLinker
-from theano.misc.safe_asarray import _asarray
-from theano.tensor import blas, blas_c
-from theano.tensor.basic import (
+import aesara.scalar as ts
+from aesara.compile.debugmode import DebugMode
+from aesara.compile.function import function
+from aesara.compile.mode import get_default_mode
+from aesara.compile.sharedvalue import shared
+from aesara.configdefaults import config
+from aesara.gradient import NullTypeGradError, grad, numeric_grad
+from aesara.graph.basic import Variable, applys_between
+from aesara.graph.fg import FunctionGraph
+from aesara.link.c.basic import DualLinker
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor import blas, blas_c
+from aesara.tensor.basic import (
     as_tensor_variable,
     constant,
     eye,
     get_scalar_constant_value,
     switch,
 )
-from theano.tensor.elemwise import CAReduce, Elemwise
-from theano.tensor.math import (
+from aesara.tensor.elemwise import CAReduce, Elemwise
+from aesara.tensor.math import (
     Argmax,
     Dot,
     MaxAndArgmax,
@@ -149,10 +100,10 @@
     sqrt,
     sub,
 )
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tan, tanh, tensordot, true_div, trunc, var
-from theano.tensor.nnet import sigmoid
-from theano.tensor.type import (
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tan, tanh, tensordot, true_div, trunc, var
+from aesara.tensor.nnet import sigmoid
+from aesara.tensor.type import (
     TensorType,
     complex_dtypes,
     continuous_dtypes,
@@ -180,7 +131,56 @@
     vectors,
     zvector,
 )
-from theano.tensor.type_other import NoneConst
+from aesara.tensor.type_other import NoneConst
+from tests import unittest_tools as utt
+from tests.tensor.utils import (
+    _bad_build_broadcast_binary_normal,
+    _bad_runtime_broadcast_binary_normal,
+    _bad_runtime_inv,
+    _eps,
+    _good_broadcast_binary_arctan2,
+    _good_broadcast_binary_normal,
+    _good_broadcast_div_mod_normal_float,
+    _good_broadcast_div_mod_normal_float_no_complex,
+    _good_broadcast_pow_normal_float,
+    _good_broadcast_unary_arccosh,
+    _good_broadcast_unary_arcsin,
+    _good_broadcast_unary_arctanh,
+    _good_broadcast_unary_normal,
+    _good_broadcast_unary_normal_float_no_complex,
+    _good_broadcast_unary_normal_float_no_empty_no_complex,
+    _good_broadcast_unary_normal_no_complex,
+    _good_broadcast_unary_positive,
+    _good_broadcast_unary_tan,
+    _good_broadcast_unary_wide,
+    _good_inv,
+    _grad_broadcast_binary_normal,
+    _grad_broadcast_pow_normal,
+    _grad_broadcast_unary_normal,
+    _grad_broadcast_unary_normal_no_complex,
+    _grad_broadcast_unary_normal_no_complex_no_corner_case,
+    _grad_broadcast_unary_normal_noint,
+    _grad_inv,
+    _numpy_true_div,
+    angle_eps,
+    check_floatX,
+    copymod,
+    div_grad_rtol,
+    eval_outputs,
+    get_numeric_types,
+    ignore_isfinite_mode,
+    inplace_func,
+    makeBroadcastTester,
+    makeTester,
+    rand,
+    rand_nonzero,
+    rand_ranged,
+    randcomplex,
+    randint,
+    randuint32,
+    upcast_float16_ufunc,
+    upcast_int8_nfunc,
+)
 
 
 if config.mode == "FAST_COMPILE":
@@ -1242,7 +1242,7 @@ def _grad_list(self):
         # Test the gradient when we have multiple axis at the same time.
         #
         # This not implemented, so we disable the test. See ticket:
-        # http://www.assembla.com/spaces/theano/tickets/511
+        # http://www.assembla.com/spaces/aesara/tickets/511
         data = rand(2, 3)
         for fct in [max_and_argmax, max, min]:
             utt.verify_grad(lambda v: fct(v, axis=[0, 1]), [data])
@@ -1710,7 +1710,7 @@ def test_complex_all_ops(self):
             for s, fn in tests:
                 f = inplace_func([], fn(a, b))
                 # print 'valid output:', fn(a.data, b.data)
-                # print 'theano output:', f(a.data, b.data)
+                # print 'Aesara output:', f(a.data, b.data)
                 assert a.type.values_eq_approx(fn(a.get_value(), b.get_value()), f())
 
     def test_grad_scalar_l(self):
@@ -1807,7 +1807,7 @@ def test_list(self):
 
 
 def test_dot_numpy_inputs():
-    """Test the `theano.tensor.dot` interface function with NumPy inputs."""
+    """Test the `Aesara.tensor.dot` interface function with NumPy inputs."""
     a = np.ones(2)
     b = np.ones(2)
     res = dot(a, b)
@@ -2199,20 +2199,20 @@ def test_arithmetic_cast(self):
         # Here:
         # scalar == scalar stored as a 0d array
         # array == 1d array
-        # i_scalar == scalar type used internally by Theano
-        def theano_scalar(dtype):
+        # i_scalar == scalar type used internally by Aesara
+        def Aesara_scalar(dtype):
             return scalar(dtype=str(dtype))
 
         def numpy_scalar(dtype):
             return np.array(1, dtype=dtype)
 
-        def theano_array(dtype):
+        def Aesara_array(dtype):
             return vector(dtype=str(dtype))
 
         def numpy_array(dtype):
             return np.array([1], dtype=dtype)
 
-        def theano_i_scalar(dtype):
+        def Aesara_i_scalar(dtype):
             return ts.Scalar(str(dtype))()
 
         def numpy_i_scalar(dtype):
@@ -2243,15 +2243,15 @@ def numpy_i_scalar(dtype):
                                     ("i_scalar", "i_scalar"),
                                 ):
 
-                                    theano_args = list(
-                                        map(eval, [f"theano_{c}" for c in combo])
+                                    Aesara_args = list(
+                                        map(eval, [f"Aesara_{c}" for c in combo])
                                     )
                                     numpy_args = list(
                                         map(eval, [f"numpy_{c}" for c in combo])
                                     )
-                                    theano_dtype = op(
-                                        theano_args[0](a_type),
-                                        theano_args[1](b_type),
+                                    Aesara_dtype = op(
+                                        Aesara_args[0](a_type),
+                                        Aesara_args[1](b_type),
                                     ).type.dtype
 
                                     # For numpy we have a problem:
@@ -2269,7 +2269,7 @@ def numpy_i_scalar(dtype):
                                     numpy_dtype = ts.upcast(
                                         *list(map(str, numpy_dtypes))
                                     )
-                                    if numpy_dtype == theano_dtype:
+                                    if numpy_dtype == Aesara_dtype:
                                         # Same data type found, all is good!
                                         continue
                                     if (
@@ -2280,11 +2280,11 @@ def numpy_i_scalar(dtype):
                                         and numpy_dtype == "float64"
                                     ):
                                         # We should keep float32.
-                                        assert theano_dtype == "float32"
+                                        assert Aesara_dtype == "float32"
                                         continue
                                     if "array" in combo and "scalar" in combo:
                                         # For mixed scalar / array operations,
-                                        # Theano may differ from numpy as it does
+                                        # Aesara may differ from numpy as it does
                                         # not try to prevent the scalar from
                                         # upcasting the array.
                                         array_type, scalar_type = (
@@ -2300,8 +2300,8 @@ def numpy_i_scalar(dtype):
                                             # the scalar type as well.
                                             array_type != up_type
                                             and
-                                            # Theano upcasted the result array.
-                                            theano_dtype == up_type
+                                            # Aesara upcasted the result array.
+                                            Aesara_dtype == up_type
                                             and
                                             # But Numpy kept its original type.
                                             array_type == numpy_dtype
@@ -2315,7 +2315,7 @@ def numpy_i_scalar(dtype):
                                         and a_type == "complex128"
                                         and (b_type == "float32" or b_type == "float16")
                                         and combo == ("scalar", "array")
-                                        and theano_dtype == "complex128"
+                                        and Aesara_dtype == "complex128"
                                         and numpy_dtype == "complex64"
                                     ):
                                         # In numpy 1.6.x adding a complex128 with
@@ -2518,7 +2518,7 @@ def test_round(self):
     def test_std(self):
         X, _ = self.vars
         x, _ = self.vals
-        # std() is implemented as theano tree and does not pass its
+        # std() is implemented as Aesara tree and does not pass its
         # args directly to numpy. This sometimes results in small
         # difference, so we use allclose test.
         utt.assert_allclose(X.std().eval({X: x}), x.std())
@@ -2768,14 +2768,14 @@ def setup_method(self):
 
     def run_isfunc(self, tt_func, np_func):
         for args in (self.scalar, self.vector):
-            theano_isfunc = function([args], tt_func(args), mode=self.mode)
+            Aesara_isfunc = function([args], tt_func(args), mode=self.mode)
             for x in self.test_vals:
                 if (x.ndim == 0 and args is not self.scalar) or (
                     x.ndim == 1 and args is not self.vector
                 ):
                     # We only test with the appropriate input type.
                     continue
-                t_out = theano_isfunc(x)
+                t_out = Aesara_isfunc(x)
                 n_out = np_func(x)
                 assert (t_out == n_out).all(), (t_out, n_out)
 
@@ -3210,7 +3210,7 @@ def test_grad_useless_sum():
     """
     Test absence of useless sum.
 
-    When an operation (such as `theano.tensor.mul`) is done on a broadcastable
+    When an operation (such as `Aesara.tensor.mul`) is done on a broadcastable
     vector and a matrix, the gradient in backward path is computed for the
     broadcasted vector. So a sum reverts the broadcasted vector to a vector. In
     the case of operations on two broadcastable vectors, the sum should not be
diff --git a/tests/tensor/test_math_opt.py b/tests/tensor/test_math_opt.py
index d6f1e2560e..3587cc0cff 100644
--- a/tests/tensor/test_math_opt.py
+++ b/tests/tensor/test_math_opt.py
@@ -6,32 +6,31 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.scalar as ts
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from theano import pprint, shared
-from theano.compile import optdb
-from theano.compile.debugmode import DebugMode
-from theano.compile.function import function
-from theano.compile.mode import Mode, get_default_mode, get_mode
-from theano.compile.ops import DeepCopyOp, deep_copy_op
-from theano.configdefaults import config
-from theano.graph.basic import Constant
-from theano.graph.fg import FunctionGraph
-from theano.graph.opt import LocalOptGroup, TopoOptimizer, check_stack_trace, out2in
-from theano.graph.optdb import Query
-from theano.misc.safe_asarray import _asarray
-from theano.tensor import inplace
-from theano.tensor.basic import Alloc, join
-from theano.tensor.basic_opt import local_dimshuffle_lift
-from theano.tensor.blas import Dot22, Gemv
-from theano.tensor.blas_c import CGemv
-from theano.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from theano.tensor.math import Dot, MaxAndArgmax, Prod, Sum, abs_, add
-from theano.tensor.math import all as tt_all
-from theano.tensor.math import any as tt_any
-from theano.tensor.math import (
+import aesara
+import aesara.scalar as ts
+import aesara.tensor as tt
+from aesara import pprint, shared
+from aesara.compile import optdb
+from aesara.compile.debugmode import DebugMode
+from aesara.compile.function import function
+from aesara.compile.mode import Mode, get_default_mode, get_mode
+from aesara.compile.ops import DeepCopyOp, deep_copy_op
+from aesara.configdefaults import config
+from aesara.graph.basic import Constant
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.opt import LocalOptGroup, TopoOptimizer, check_stack_trace, out2in
+from aesara.graph.optdb import Query
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor import inplace
+from aesara.tensor.basic import Alloc, join
+from aesara.tensor.basic_opt import local_dimshuffle_lift
+from aesara.tensor.blas import Dot22, Gemv
+from aesara.tensor.blas_c import CGemv
+from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise
+from aesara.tensor.math import Dot, MaxAndArgmax, Prod, Sum, abs_, add
+from aesara.tensor.math import all as tt_all
+from aesara.tensor.math import any as tt_any
+from aesara.tensor.math import (
     arccosh,
     arcsinh,
     arctanh,
@@ -62,23 +61,23 @@
     log10,
     lt,
 )
-from theano.tensor.math import max as tt_max
-from theano.tensor.math import maximum
-from theano.tensor.math import min as tt_min
-from theano.tensor.math import minimum, mul, neg, neq
-from theano.tensor.math import pow as tt_pow
-from theano.tensor.math import prod, rad2deg
-from theano.tensor.math import round as tt_round
-from theano.tensor.math import sgn, sin, sinh, sqr, sqrt, sub
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tan, tanh, true_div, xor
-from theano.tensor.math_opt import (
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.math import maximum
+from aesara.tensor.math import min as tt_min
+from aesara.tensor.math import minimum, mul, neg, neq
+from aesara.tensor.math import pow as tt_pow
+from aesara.tensor.math import prod, rad2deg
+from aesara.tensor.math import round as tt_round
+from aesara.tensor.math import sgn, sin, sinh, sqr, sqrt, sub
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tan, tanh, true_div, xor
+from aesara.tensor.math_opt import (
     local_add_specialize,
     local_greedy_distributor,
     mul_canonizer,
 )
-from theano.tensor.shape import Shape_i
-from theano.tensor.type import (
+from aesara.tensor.shape import Shape_i
+from aesara.tensor.type import (
     TensorType,
     cmatrix,
     dmatrices,
@@ -106,7 +105,8 @@
     vector,
     vectors,
 )
-from theano.tensor.var import TensorConstant
+from aesara.tensor.var import TensorConstant
+from tests import unittest_tools as utt
 
 
 mode_opt = config.mode
@@ -941,12 +941,12 @@ def test_canonicalize_nan(self):
         sio = StringIO()
         handler = logging.StreamHandler(sio)
         handler.setLevel(logging.ERROR)
-        logging.getLogger("theano.graph.opt").addHandler(handler)
+        logging.getLogger("aesara.graph.opt").addHandler(handler)
         try:
             x = vector()
             function([x], x + np.nan)
         finally:
-            logging.getLogger("theano.graph.opt").removeHandler(handler)
+            logging.getLogger("aesara.graph.opt").removeHandler(handler)
         # Ideally this test would only catch the maxed out equilibrium
         # optimizer error message, but to be safe in case this message
         # is modified in the future, we assert that there is no error
@@ -985,7 +985,7 @@ def test_merge_abs_bugfix():
     step2 = step1 / step1.sum(1)
     # get l1 norm
     l1_norm = abs_(step2).sum()
-    function([input], theano.gradient.grad(l1_norm, input))
+    function([input], aesara.gradient.grad(l1_norm, input))
 
 
 def test_mixeddiv():
@@ -1037,7 +1037,7 @@ def test_cast_in_mul_canonizer():
     o2 = e * go
     mode = get_default_mode().excluding("fusion").including("fast_run")
     f = function([x, y], [o1, o2], mode=mode)
-    theano.printing.debugprint(f, print_type=True)
+    aesara.printing.debugprint(f, print_type=True)
     nodes = f.maker.fgraph.apply_nodes
     assert (
         len(
@@ -1894,14 +1894,14 @@ def test_equality(a, b):
         return a.shape == b.shape and np.allclose(a, b)
 
     # [cst]
-    f = function([m1, m2], theano.tensor.dot(m1, m2)[1], mode=mode)
+    f = function([m1, m2], aesara.tensor.dot(m1, m2)[1], mode=mode)
     topo = f.maker.fgraph.toposort()
     assert test_equality(f(d1, d2), np.dot(d1, d2)[1])
     # DimShuffle happen in FAST_COMPILE
     assert isinstance(topo[-1].op, (CGemv, Gemv, DimShuffle))
 
     # slice
-    f = function([m1, m2], theano.tensor.dot(m1, m2)[1:2], mode=mode)
+    f = function([m1, m2], aesara.tensor.dot(m1, m2)[1:2], mode=mode)
     topo = f.maker.fgraph.toposort()
     assert test_equality(f(d1, d2), np.dot(d1, d2)[1:2])
     assert isinstance(topo[-1].op, Dot22)
@@ -1912,12 +1912,12 @@ def test_equality(a, b):
     d1 = np.arange(30).reshape(2, 5, 3).astype(config.floatX)
     d2 = np.arange(72).reshape(4, 3, 6).astype(config.floatX) + 100
 
-    f = function([m1, m2, idx], theano.tensor.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
+    f = function([m1, m2, idx], aesara.tensor.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
     assert test_equality(f(d1, d2, 1), np.dot(d1, d2)[1, 1:4, :, 1:])
     # if we return the gradients. We need to use same mode as before.
     assert check_stack_trace(f, ops_to_check="last")
 
-    f = function([m1, m2, idx], theano.tensor.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
+    f = function([m1, m2, idx], aesara.tensor.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
     assert test_equality(f(d1, d2, 1), np.dot(d1, d2)[1:4, :, 1:, 1])
 
     # Now test that the stack trace is copied over properly,
@@ -1995,11 +1995,11 @@ def test_local_useless_elemwise_comparison(self):
         # The following case is what made me discover those cases.
         X = matrix("X")
         Y = vector("Y")
-        X_sum, updates = theano.scan(
+        X_sum, updates = aesara.scan(
             fn=lambda x: x.sum(), outputs_info=None, sequences=[X], non_sequences=None
         )
         Z = X_sum + Y
-        # theano.printing.debugprint(Z)
+        # aesara.printing.debugprint(Z)
         # here is the output for the debug print:
         """
         Elemwise{add,no_inplace} [id A] ''
@@ -2030,7 +2030,7 @@ def test_local_useless_elemwise_comparison(self):
             self.rng.rand(2, 3).astype(config.floatX),
             self.rng.rand(2).astype(config.floatX),
         )
-        # theano.printing.debugprint(f, print_type=True)
+        # aesara.printing.debugprint(f, print_type=True)
         # here is the output for the debug print:
         """
         Elemwise{Add}[(0, 0)] [id A] <TensorType(float64, vector)> ''   7
@@ -2581,7 +2581,7 @@ def test_local_mul_switch_sink(self):
         # This case caused a missed optimization in the past.
         x = dscalar("x")
         y = tt.switch(x < 7, x, sqrt(x - 7))
-        f = self.function_remove_nan([x], theano.gradient.grad(y, x), self.mode)
+        f = self.function_remove_nan([x], aesara.gradient.grad(y, x), self.mode)
         assert f(5) == 1, f(5)
 
     @pytest.mark.slow
@@ -2887,7 +2887,7 @@ def test_local_grad_log_erfc_neg(self):
         mode_fusion = copy.copy(self.mode_fusion)
         mode_fusion.check_isfinite = False
 
-        f = function([x], theano.gradient.grad(log(erfc(x)).sum(), x), mode=mode)
+        f = function([x], aesara.gradient.grad(log(erfc(x)).sum(), x), mode=mode)
 
         assert len(f.maker.fgraph.apply_nodes) == 22, len(f.maker.fgraph.apply_nodes)
         assert all(np.isfinite(f(val)))
@@ -2918,12 +2918,12 @@ def test_local_grad_log_erfc_neg(self):
         assert all(np.isfinite(f(val)))
 
         # test that it work correctly if x is x*2 in the graph.
-        f = function([x], theano.gradient.grad(log(erfc(2 * x)).sum(), x), mode=mode)
+        f = function([x], aesara.gradient.grad(log(erfc(2 * x)).sum(), x), mode=mode)
         assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
         assert np.isfinite(f(val)).all()
         assert f.maker.fgraph.outputs[0].dtype == config.floatX
 
-        f = function([x], theano.gradient.grad(log(erfc(x)).sum(), x), mode=mode_fusion)
+        f = function([x], aesara.gradient.grad(log(erfc(x)).sum(), x), mode=mode_fusion)
         assert len(f.maker.fgraph.apply_nodes) == 1, len(f.maker.fgraph.apply_nodes)
         assert f.maker.fgraph.outputs[0].dtype == config.floatX
 
@@ -3493,7 +3493,7 @@ def test_local_reduce_join(self):
         topo = f.maker.fgraph.toposort()
         assert isinstance(topo[-1].op, Elemwise)
 
-        # Test a case that was bugged in a old Theano bug
+        # Test a case that was bugged in a old Aesara bug
         with config.change_flags(warn__reduce_join=False):
             f = function([], tt_sum(tt.stack([A, A]), axis=1), mode=self.mode)
 
diff --git a/tests/tensor/test_merge.py b/tests/tensor/test_merge.py
index 86fa62da0c..d02816a345 100644
--- a/tests/tensor/test_merge.py
+++ b/tests/tensor/test_merge.py
@@ -1,11 +1,11 @@
 import numpy as np
 
-import theano.tensor.basic as tt
-from theano.graph.basic import Apply, Variable
-from theano.graph.fg import FunctionGraph
-from theano.graph.op import Op
-from theano.graph.opt import MergeOptimizer
-from theano.graph.type import Type
+import aesara.tensor.basic as tt
+from aesara.graph.basic import Apply, Variable
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.op import Op
+from aesara.graph.opt import MergeOptimizer
+from aesara.graph.type import Type
 
 
 def is_variable(x):
diff --git a/tests/tensor/test_misc.py b/tests/tensor/test_misc.py
index ac1090881f..cf769a0d78 100644
--- a/tests/tensor/test_misc.py
+++ b/tests/tensor/test_misc.py
@@ -2,11 +2,11 @@
 
 import numpy as np
 
-from theano.compile.function import function
-from theano.compile.io import Out
-from theano.tensor.math import dot
-from theano.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias
-from theano.tensor.type import dmatrix, dvector, ivector, matrix
+from aesara.compile.function import function
+from aesara.compile.io import Out
+from aesara.tensor.math import dot
+from aesara.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias
+from aesara.tensor.type import dmatrix, dvector, ivector, matrix
 
 
 def test_bug_2009_07_17_borrowed_output():
diff --git a/tests/tensor/test_mlp.py b/tests/tensor/test_mlp.py
index da28401a8f..baa89e22e6 100644
--- a/tests/tensor/test_mlp.py
+++ b/tests/tensor/test_mlp.py
@@ -10,12 +10,12 @@
 
 import numpy as np
 
-import theano
-import theano.tensor as tt
-from theano.gradient import grad
-from theano.tensor.math import argmax, dot, log, tanh
-from theano.tensor.nnet.basic import CrossentropySoftmax1HotWithBiasDx, softmax
-from theano.tensor.type import ivector, lscalar, matrix
+import aesara
+import aesara.tensor as tt
+from aesara.gradient import grad
+from aesara.tensor.math import argmax, dot, log, tanh
+from aesara.tensor.nnet.basic import CrossentropySoftmax1HotWithBiasDx, softmax
+from aesara.tensor.type import ivector, lscalar, matrix
 
 
 def gen_data():
@@ -38,14 +38,14 @@ def shared_dataset(data_xy):
         """Function that loads the dataset into shared variables
 
         The reason we store our dataset in shared variables is to allow
-        Theano to copy it into the GPU memory (when code is run on GPU).
+        Aesara to copy it into the GPU memory (when code is run on GPU).
         Since copying data into the GPU is slow, copying a minibatch every time
         is needed (the default behaviour if the data is not in a shared
         variable) would lead to a large decrease in performance.
         """
         data_x, data_y = data_xy
-        shared_x = theano.shared(np.asarray(data_x, dtype=theano.config.floatX))
-        shared_y = theano.shared(np.asarray(data_y, dtype=theano.config.floatX))
+        shared_x = aesara.shared(np.asarray(data_x, dtype=aesara.config.floatX))
+        shared_y = aesara.shared(np.asarray(data_y, dtype=aesara.config.floatX))
         # When storing data on the GPU it has to be stored as floats
         # therefore we will store the labels as ``floatX`` as well
         # (``shared_y`` does exactly that). But during our computations
@@ -94,8 +94,8 @@ def __init__(self, input, n_in, n_out, name_prefix=""):
         """
 
         # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
-        self.W = theano.shared(
-            value=np.zeros((n_in, n_out), dtype=theano.config.floatX),
+        self.W = aesara.shared(
+            value=np.zeros((n_in, n_out), dtype=aesara.config.floatX),
             name=name_prefix + "W",
         )
 
@@ -158,7 +158,7 @@ def __init__(self, rng, input, n_in, n_out, activation=tanh, name_prefix=""):
         :type n_out: int
         :param n_out: number of hidden units
 
-        :type activation: theano.graph.op.Op or function
+        :type activation: aesara.graph.op.Op or function
         :param activation: Non linearity to be applied in the hidden
                               layer
         """
@@ -167,16 +167,16 @@ def __init__(self, rng, input, n_in, n_out, activation=tanh, name_prefix=""):
         # `W` is initialized with `W_values` which is uniformely sampled
         # from -6./sqrt(n_in+n_hidden) and 6./sqrt(n_in+n_hidden)
         # the output of uniform if converted using asarray to dtype
-        # theano.config.floatX so that the code is runable on GPU
+        # aesara.config.floatX so that the code is runable on GPU
         W_values = np.asarray(
             rng.uniform(
                 low=-np.sqrt(6.0 / (n_in + n_out)),
                 high=np.sqrt(6.0 / (n_in + n_out)),
                 size=(n_in, n_out),
             ),
-            dtype=theano.config.floatX,
+            dtype=aesara.config.floatX,
         )
-        self.W = theano.shared(value=W_values, name=name_prefix + "W")
+        self.W = aesara.shared(value=W_values, name=name_prefix + "W")
 
         self.output = dot(input, self.W)
         # parameters of the model
@@ -312,14 +312,14 @@ def test_mlp():
 
     # Some optimizations needed are tagged with 'fast_run'
     # TODO: refine that and include only those
-    mode = theano.compile.get_default_mode().including("fast_run")
+    mode = aesara.compile.get_default_mode().including("fast_run")
 
     updates2 = OrderedDict()
 
     updates2[classifier.hiddenLayer.params[0]] = grad(
         cost, classifier.hiddenLayer.params[0]
     )
-    train_model = theano.function(
+    train_model = aesara.function(
         inputs=[index],
         updates=updates2,
         givens={
@@ -329,7 +329,7 @@ def test_mlp():
         mode=mode,
     )
     # print 'MODEL 1'
-    # theano.printing.debugprint(train_model, print_type=True)
+    # aesara.printing.debugprint(train_model, print_type=True)
     assert any(
         [
             isinstance(i.op, CrossentropySoftmax1HotWithBiasDx)
@@ -338,7 +338,7 @@ def test_mlp():
     )
 
     # Even without FeatureShape
-    train_model = theano.function(
+    train_model = aesara.function(
         inputs=[index],
         updates=updates2,
         mode=mode.excluding("ShapeOpt"),
@@ -349,7 +349,7 @@ def test_mlp():
     )
     # print
     # print 'MODEL 2'
-    # theano.printing.debugprint(train_model, print_type=True)
+    # aesara.printing.debugprint(train_model, print_type=True)
     assert any(
         [
             isinstance(i.op, CrossentropySoftmax1HotWithBiasDx)
diff --git a/tests/tensor/test_mpi.py b/tests/tensor/test_mpi.py
index ce69b4e609..68a8730613 100644
--- a/tests/tensor/test_mpi.py
+++ b/tests/tensor/test_mpi.py
@@ -3,12 +3,12 @@
 
 import pytest
 
-import theano
-from theano.compile.mode import Mode
-from theano.configdefaults import config
-from theano.graph.sched import sort_schedule_fn
-from theano.link.c.basic import OpWiseCLinker
-from theano.tensor.io import (
+import aesara
+from aesara.compile.mode import Mode
+from aesara.configdefaults import config
+from aesara.graph.sched import sort_schedule_fn
+from aesara.link.c.basic import OpWiseCLinker
+from aesara.tensor.io import (
     MPISend,
     MPISendWait,
     mpi_cmps,
@@ -17,7 +17,7 @@
     recv,
     send,
 )
-from theano.tensor.type import matrix
+from aesara.tensor.type import matrix
 
 
 mpi_scheduler = sort_schedule_fn(*mpi_cmps)
@@ -48,20 +48,20 @@ def test_send():
 def test_can_make_function():
     x = recv((5, 5), "float32", 0, 11)
     y = x + 1
-    assert theano.function([], [y])
+    assert aesara.function([], [y])
 
 
 @pytest.mark.skipif(not mpi_enabled, reason="MPI not enabled")
 def test_mpi_roundtrip():
-    theano_root = theano.__file__.split("__init__")[0]
+    aesara_root = aesara.__file__.split("__init__")[0]
     env = os.environ.copy()
-    flags = env.get("THEANO_FLAGS", "")
+    flags = env.get("AESARA_FLAGS", "")
     keep_flags = ",".join(
         f for f in flags.split(",") if not f.startswith("init_gpu_device")
     )
-    env["THEANO_FLAGS"] = keep_flags
+    env["AESARA_FLAGS"] = keep_flags
     p = subprocess.Popen(
-        "mpiexec -np 2 python " + theano_root + "tensor/tests/_test_mpi_roundtrip.py",
+        "mpiexec -np 2 python " + aesara_root + "tensor/tests/_test_mpi_roundtrip.py",
         stdin=subprocess.PIPE,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
@@ -91,7 +91,7 @@ def test_mpi_tag_ordering():
     x = recv((2, 2), "float32", 1, 12)
     y = recv((2, 2), "float32", 1, 11)
     z = recv((2, 2), "float32", 1, 13)
-    f = theano.function([], [x, y, z], mode=mpi_mode)
+    f = aesara.function([], [x, y, z], mode=mpi_mode)
     nodes = f.maker.linker.make_all()[-1]
 
     assert all(node.op.tag == tag for node, tag in zip(nodes, (11, 12, 13, 11, 12, 13)))
@@ -102,7 +102,7 @@ def test_mpi_schedule():
     y = send(x, 1, 11)
     z = x + x
 
-    f = theano.function([x], [y, z], mode=mpi_mode)
+    f = aesara.function([x], [y, z], mode=mpi_mode)
     nodes = f.maker.linker.make_all()[-1]
-    optypes = [MPISend, theano.tensor.elemwise.Elemwise, MPISendWait]
+    optypes = [MPISend, aesara.tensor.elemwise.Elemwise, MPISendWait]
     assert all(isinstance(node.op, optype) for node, optype in zip(nodes, optypes))
diff --git a/tests/tensor/test_nlinalg.py b/tests/tensor/test_nlinalg.py
index 48dfb290ee..d8bbed56d1 100644
--- a/tests/tensor/test_nlinalg.py
+++ b/tests/tensor/test_nlinalg.py
@@ -4,12 +4,11 @@
 from numpy import inf
 from numpy.testing import assert_array_almost_equal
 
-import theano
-from tests import unittest_tools as utt
-from theano import function
-from theano.configdefaults import config
-from theano.tensor.math import _allclose
-from theano.tensor.nlinalg import (
+import aesara
+from aesara import function
+from aesara.configdefaults import config
+from aesara.tensor.math import _allclose
+from aesara.tensor.nlinalg import (
     SVD,
     AllocDiag,
     Eig,
@@ -33,7 +32,7 @@
     tensorsolve,
     trace,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     TensorType,
     lmatrix,
     lscalar,
@@ -43,6 +42,7 @@
     tensor4,
     vector,
 )
+from tests import unittest_tools as utt
 
 
 def test_pseudoinverse_correctness():
@@ -116,12 +116,12 @@ def test_matrix_dot():
         xs += [matrix()]
     sol = matrix_dot(*xs)
 
-    theano_sol = function(xs, sol)(*rs)
+    aesara_sol = function(xs, sol)(*rs)
     numpy_sol = rs[0]
     for r in rs[1:]:
         numpy_sol = np.dot(numpy_sol, r)
 
-    assert _allclose(numpy_sol, theano_sol)
+    assert _allclose(numpy_sol, aesara_sol)
 
 
 def test_qr_modes():
@@ -266,7 +266,7 @@ def test_det():
 
     r = rng.randn(5, 5).astype(config.floatX)
     x = matrix()
-    f = theano.function([x], det(x))
+    f = aesara.function([x], det(x))
     assert np.allclose(np.linalg.det(r), f(r))
 
 
@@ -282,8 +282,8 @@ def test_det_shape():
     r = rng.randn(5, 5).astype(config.floatX)
 
     x = matrix()
-    f = theano.function([x], det(x))
-    f_shape = theano.function([x], det(x).shape)
+    f = aesara.function([x], det(x))
+    f_shape = aesara.function([x], det(x).shape)
     assert np.all(f(r).shape == f_shape(r))
 
 
@@ -305,7 +305,7 @@ class TestDiag:
 
     def setup_method(self):
         self.mode = None
-        self.shared = theano.shared
+        self.shared = aesara.shared
         self.floatX = config.floatX
         self.type = TensorType
 
@@ -313,7 +313,7 @@ def test_alloc_diag(self):
         rng = np.random.RandomState(utt.fetch_seed())
         x = vector()
         g = alloc_diag(x)
-        f = theano.function([x], g)
+        f = aesara.function([x], g)
 
         # test "normal" scenario (5x5 matrix) and special cases of 0x0 and 1x1
         for shp in [5, 0, 1]:
@@ -333,7 +333,7 @@ def test_alloc_diag(self):
         assert ok
 
         # Test infer_shape
-        f = theano.function([x], g.shape)
+        f = aesara.function([x], g.shape)
         topo = f.maker.fgraph.toposort()
         if config.mode != "FAST_COMPILE":
             assert sum([node.op.__class__ == AllocDiag for node in topo]) == 0
@@ -364,7 +364,7 @@ def test_extract_diag(self):
         m = rng.rand(2, 3).astype(self.floatX)
         x = self.shared(m)
         g = extract_diag(x)
-        f = theano.function([], g)
+        f = aesara.function([], g)
         assert [
             isinstance(node.inputs[0].type, self.type)
             for node in f.maker.fgraph.toposort()
@@ -391,7 +391,7 @@ def test_extract_diag(self):
         assert ok
 
         # Test infer_shape
-        f = theano.function([], g.shape)
+        f = aesara.function([], g.shape)
         topo = f.maker.fgraph.toposort()
         if config.mode != "FAST_COMPILE":
             assert sum([node.op.__class__ == ExtractDiag for node in topo]) == 0
@@ -408,7 +408,7 @@ def test_extract_diag_grad(self):
     @pytest.mark.slow
     def test_extract_diag_empty(self):
         c = self.shared(np.array([[], []], self.floatX))
-        f = theano.function([], extract_diag(c), mode=self.mode)
+        f = aesara.function([], extract_diag(c), mode=self.mode)
 
         assert [
             isinstance(node.inputs[0].type, self.type)
@@ -421,7 +421,7 @@ def test_trace():
     rng = np.random.RandomState(utt.fetch_seed())
     x = matrix()
     g = trace(x)
-    f = theano.function([x], g)
+    f = aesara.function([x], g)
 
     for shp in [(2, 3), (3, 2), (3, 3)]:
         m = rng.rand(*shp).astype(config.floatX)
@@ -456,8 +456,8 @@ def test_infer_shape(self):
         A = self.A
         S = self.S
         self._compile_and_check(
-            [A],  # theano.function inputs
-            self.op(A),  # theano.function outputs
+            [A],  # aesara.function inputs
+            self.op(A),  # aesara.function outputs
             # S must be square
             [S],
             self.op_class,
@@ -508,7 +508,7 @@ def test_correct_solution(self):
         x = lmatrix()
         y = lmatrix()
         z = lscalar()
-        b = theano.tensor.nlinalg.lstsq()(x, y, z)
+        b = aesara.tensor.nlinalg.lstsq()(x, y, z)
         f = function([x, y, z], b)
         TestMatrix1 = np.asarray([[2, 1], [3, 4]])
         TestMatrix2 = np.asarray([[17, 20], [43, 50]])
@@ -521,7 +521,7 @@ def test_wrong_coefficient_matrix(self):
         x = vector()
         y = vector()
         z = scalar()
-        b = theano.tensor.nlinalg.lstsq()(x, y, z)
+        b = aesara.tensor.nlinalg.lstsq()(x, y, z)
         f = function([x, y, z], b)
         with pytest.raises(np.linalg.linalg.LinAlgError):
             f([2, 1], [2, 1], 1)
@@ -530,7 +530,7 @@ def test_wrong_rcond_dimension(self):
         x = vector()
         y = vector()
         z = vector()
-        b = theano.tensor.nlinalg.lstsq()(x, y, z)
+        b = aesara.tensor.nlinalg.lstsq()(x, y, z)
         f = function([x, y, z], b)
         with pytest.raises(np.linalg.LinAlgError):
             f([2, 1], [2, 1], [2, 1])
@@ -619,8 +619,8 @@ def test_infer_shape(self):
         A = self.A
         Ai = tensorinv(A)
         self._compile_and_check(
-            [A],  # theano.function inputs
-            [Ai],  # theano.function outputs
+            [A],  # aesara.function inputs
+            [Ai],  # aesara.function outputs
             [self.a],  # value to substitute
             TensorInv,
         )
diff --git a/tests/tensor/test_opt_uncanonicalize.py b/tests/tensor/test_opt_uncanonicalize.py
index d138e659bc..131bd52ddb 100644
--- a/tests/tensor/test_opt_uncanonicalize.py
+++ b/tests/tensor/test_opt_uncanonicalize.py
@@ -1,34 +1,34 @@
 import numpy as np
 
-import theano
-import theano.tensor as tt
-from tests import unittest_tools as utt
-from theano import function
-from theano import scalar as ts
-from theano.configdefaults import config
-from theano.graph.fg import FunctionGraph
-from theano.graph.opt import out2in
-from theano.link.basic import PerformLinker
-from theano.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from theano.tensor.math import MaxAndArgmax
-from theano.tensor.math import max as tt_max
-from theano.tensor.math import max_and_argmax
-from theano.tensor.math import min as tt_min
-from theano.tensor.opt_uncanonicalize import (
+import aesara
+import aesara.tensor as tt
+from aesara import function
+from aesara import scalar as ts
+from aesara.configdefaults import config
+from aesara.graph.fg import FunctionGraph
+from aesara.graph.opt import out2in
+from aesara.link.basic import PerformLinker
+from aesara.tensor.elemwise import CAReduce, DimShuffle, Elemwise
+from aesara.tensor.math import MaxAndArgmax
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.math import max_and_argmax
+from aesara.tensor.math import min as tt_min
+from aesara.tensor.opt_uncanonicalize import (
     local_alloc_dimshuffle,
     local_dimshuffle_alloc,
     local_dimshuffle_subtensor,
     local_reshape_dimshuffle,
 )
-from theano.tensor.shape import reshape
-from theano.tensor.type import dtensor4, iscalar, matrix, tensor, vector
+from aesara.tensor.shape import reshape
+from aesara.tensor.type import dtensor4, iscalar, matrix, tensor, vector
+from tests import unittest_tools as utt
 
 
 class TestMaxAndArgmax:
     def test_optimization(self):
         # If we use only the max output, we should replace this op with
         # a faster one.
-        mode = theano.compile.mode.get_default_mode().including(
+        mode = aesara.compile.mode.get_default_mode().including(
             "canonicalize", "fast_run"
         )
 
@@ -49,7 +49,7 @@ def test_optimization(self):
 class TestMinMax:
     def setup_method(self):
         utt.seed_rng()
-        self.mode = theano.compile.mode.get_default_mode().including(
+        self.mode = aesara.compile.mode.get_default_mode().including(
             "canonicalize", "fast_run"
         )
 
@@ -206,13 +206,13 @@ def test_local_dimshuffle_subtensor():
     x = tensor(broadcastable=(False, True, False, True), dtype="float64")
     out = x[i].dimshuffle(1)
 
-    f = theano.function([x, i], out)
+    f = aesara.function([x, i], out)
 
     topo = f.maker.fgraph.toposort()
     assert any([not isinstance(x, DimShuffle) for x in topo])
     assert f(np.random.rand(5, 1, 4, 1), 2).shape == (4,)
 
-    # Test a corner case that had Theano return a bug.
+    # Test a corner case that had Aesara return a bug.
     x = dtensor4("x")
     x = tt.patternbroadcast(x, (False, True, False, False))
 
diff --git a/tests/tensor/test_shape.py b/tests/tensor/test_shape.py
index 66b1313f3a..5480de2eb4 100644
--- a/tests/tensor/test_shape.py
+++ b/tests/tensor/test_shape.py
@@ -1,19 +1,16 @@
 import numpy as np
 import pytest
 
-import theano
-from tests import unittest_tools as utt
-from tests.tensor.utils import eval_outputs, rand
-from tests.test_rop import RopLopChecker
-from theano import function
-from theano.compile.ops import DeepCopyOp
-from theano.configdefaults import config
-from theano.graph.fg import FunctionGraph
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.basic import MakeVector, as_tensor_variable, constant
-from theano.tensor.basic_opt import ShapeFeature
-from theano.tensor.elemwise import DimShuffle, Elemwise
-from theano.tensor.shape import (
+import aesara
+from aesara import function
+from aesara.compile.ops import DeepCopyOp
+from aesara.configdefaults import config
+from aesara.graph.fg import FunctionGraph
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.basic import MakeVector, as_tensor_variable, constant
+from aesara.tensor.basic_opt import ShapeFeature
+from aesara.tensor.elemwise import DimShuffle, Elemwise
+from aesara.tensor.shape import (
     Reshape,
     Shape_i,
     SpecifyShape,
@@ -22,8 +19,8 @@
     shape_i,
     specify_shape,
 )
-from theano.tensor.subtensor import Subtensor
-from theano.tensor.type import (
+from aesara.tensor.subtensor import Subtensor
+from aesara.tensor.type import (
     TensorType,
     dmatrix,
     dtensor4,
@@ -34,8 +31,11 @@
     tensor3,
     vector,
 )
-from theano.tensor.type_other import NoneConst
-from theano.typed_list import make_list
+from aesara.tensor.type_other import NoneConst
+from aesara.typed_list import make_list
+from tests import unittest_tools as utt
+from tests.tensor.utils import eval_outputs, rand
+from tests.test_rop import RopLopChecker
 
 
 def test_shape_basic():
@@ -54,7 +54,7 @@ def test_shape_basic():
 
 class TestReshape(utt.InferShapeTester, utt.OptimizationTestMixin):
     def setup_method(self):
-        self.shared = theano.shared
+        self.shared = aesara.shared
         self.op = Reshape
         # The tag canonicalize is needed for the shape test in FAST_COMPILE
         self.mode = None
@@ -313,7 +313,7 @@ def test_bad_shape(self):
 
         x = vector()
         xval = np.random.rand(2).astype(config.floatX)
-        f = theano.function([x], specify_shape(x, [2]), mode=self.mode)
+        f = aesara.function([x], specify_shape(x, [2]), mode=self.mode)
         f(xval)
         xval = np.random.rand(3).astype(config.floatX)
         with pytest.raises(AssertionError):
@@ -328,7 +328,7 @@ def test_bad_shape(self):
 
         x = matrix()
         xval = np.random.rand(2, 3).astype(config.floatX)
-        f = theano.function([x], specify_shape(x, [2, 3]), mode=self.mode)
+        f = aesara.function([x], specify_shape(x, [2, 3]), mode=self.mode)
         assert isinstance(
             [n for n in f.maker.fgraph.toposort() if isinstance(n.op, SpecifyShape)][0]
             .inputs[0]
@@ -353,7 +353,7 @@ def test_bad_number_of_shape(self):
         with pytest.raises(AssertionError):
             specify_shape(x, [2, 2])
 
-        f = theano.function([x, shape_vec], specify_shape(x, shape_vec), mode=self.mode)
+        f = aesara.function([x, shape_vec], specify_shape(x, shape_vec), mode=self.mode)
         assert isinstance(
             [n for n in f.maker.fgraph.toposort() if isinstance(n.op, SpecifyShape)][0]
             .inputs[0]
@@ -370,7 +370,7 @@ def test_bad_number_of_shape(self):
         for shape_ in [(), (1,), (2, 3, 4)]:
             with pytest.raises(AssertionError):
                 specify_shape(x, shape_)
-            f = theano.function(
+            f = aesara.function(
                 [x, shape_vec], specify_shape(x, shape_vec), mode=self.mode
             )
             assert isinstance(
diff --git a/tests/tensor/test_sharedvar.py b/tests/tensor/test_sharedvar.py
index 0e67317fd3..9124524055 100644
--- a/tests/tensor/test_sharedvar.py
+++ b/tests/tensor/test_sharedvar.py
@@ -3,13 +3,13 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.sparse
-import theano.tensor as tt
+import aesara
+import aesara.sparse
+import aesara.tensor as tt
+from aesara.misc.may_share_memory import may_share_memory
+from aesara.tensor.basic import MakeVector
+from aesara.tensor.shape import Shape_i, specify_shape
 from tests import unittest_tools as utt
-from theano.misc.may_share_memory import may_share_memory
-from theano.tensor.basic import MakeVector
-from theano.tensor.shape import Shape_i, specify_shape
 
 
 utt.seed_rng()
@@ -26,7 +26,7 @@ def makeSharedTester(
     shared_constructor_accept_ndarray_,
     internal_type_,
     check_internal_type_,
-    theano_fct_,
+    aesara_fct_,
     ref_fct_,
     cast_value_=np.asarray,
     expect_fail_fast_shape_inplace=True,
@@ -49,8 +49,8 @@ def makeSharedTester(
     :param internal_type_: The internal type used.
     :param check_internal_type_: A function that tell if its input is of the same
                                 type as this shared variable internal type.
-    :param theano_fct_: A theano op that will be used to do some computation on the shared variable
-    :param ref_fct_: A reference function that should return the same value as the theano_fct_
+    :param aesara_fct_: A aesara op that will be used to do some computation on the shared variable
+    :param ref_fct_: A reference function that should return the same value as the aesara_fct_
     :param cast_value_: A callable that cast an ndarray into the internal shared variable representation
     :param name: This string is used to set the returned class' __name__
                  attribute. This is needed for tests to properly tag the
@@ -72,7 +72,7 @@ class SharedTester:
         shared_borrow_true_alias = shared_borrow_true_alias_
         internal_type = internal_type_
         check_internal_type = staticmethod(check_internal_type_)
-        theano_fct = staticmethod(theano_fct_)
+        aesara_fct = staticmethod(aesara_fct_)
         ref_fct = staticmethod(ref_fct_)
         set_value_borrow_true_alias = set_value_borrow_true_alias_
         set_value_inplace = set_value_inplace_
@@ -83,7 +83,7 @@ class SharedTester:
         def test_shared_dont_alias(self):
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x = np.asarray(rng.uniform(0, 1, [2, 4]), dtype=dtype)
@@ -91,9 +91,9 @@ def test_shared_dont_alias(self):
 
             x_ref = self.ref_fct(x)
             x_shared = self.shared_constructor(x, borrow=False)
-            total = self.theano_fct(x_shared)
+            total = self.aesara_fct(x_shared)
 
-            total_func = theano.function([], total)
+            total_func = aesara.function([], total)
 
             total_val = total_func()
 
@@ -128,7 +128,7 @@ def test_shared_dont_alias(self):
         def test_shape(self):
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x = np.asarray(rng.uniform(0, 1, [2, 4]), dtype=dtype)
@@ -136,13 +136,13 @@ def test_shape(self):
 
             self.ref_fct(x)
             x_shared = self.shared_constructor(x, borrow=False)
-            self.theano_fct(x_shared)
+            self.aesara_fct(x_shared)
 
-            f = theano.function([], x_shared.shape)
+            f = aesara.function([], x_shared.shape)
             topo = f.maker.fgraph.toposort()
 
             assert np.all(f() == (2, 4))
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert len(topo) == 3
                 assert isinstance(topo[0].op, Shape_i)
                 assert isinstance(topo[1].op, Shape_i)
@@ -151,7 +151,7 @@ def test_shape(self):
         def test_shape_i(self):
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x = np.asarray(rng.uniform(0, 1, [2, 4]), dtype=dtype)
@@ -159,29 +159,29 @@ def test_shape_i(self):
 
             self.ref_fct(x)
             x_shared = self.shared_constructor(x, borrow=False)
-            self.theano_fct(x_shared)
+            self.aesara_fct(x_shared)
 
-            f = theano.function([], x_shared.shape[1])
+            f = aesara.function([], x_shared.shape[1])
             topo = f.maker.fgraph.toposort()
 
             assert np.all(f() == (4))
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert len(topo) == 1
                 assert isinstance(topo[0].op, Shape_i)
 
         def test_return_internal_type(self):
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x = np.asarray(rng.uniform(0, 1, [2, 4]), dtype=dtype)
             x = self.cast_value(x)
 
             x_shared = self.shared_constructor(x, borrow=False)
-            total = self.theano_fct(x_shared)
+            total = self.aesara_fct(x_shared)
 
-            total_func = theano.function([], total)
+            total_func = aesara.function([], total)
 
             # in this case we can alias with the internal value
             x = x_shared.get_value(borrow=True, return_internal_type=True)
@@ -205,7 +205,7 @@ def test_get_value(self):
             # Test that get_value returns a ndarray
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x_orig = np.asarray(rng.uniform(0, 1, [2, 4]), dtype=dtype)
@@ -220,7 +220,7 @@ def test_get_value(self):
         def test_set_value(self):
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x = np.asarray(rng.uniform(0, 1, [2, 4]), dtype=dtype)
@@ -228,12 +228,12 @@ def test_set_value(self):
 
             x_orig = x
             x_shared = self.shared_constructor(x, borrow=False)
-            total = self.theano_fct(x_shared)
+            total = self.aesara_fct(x_shared)
 
-            total_func = theano.function([], total)
+            total_func = aesara.function([], total)
             total_func()
 
-            # test if that theano shared variable optimize set_value(borrow=True)
+            # test if that aesara shared variable optimize set_value(borrow=True)
             get_x = x_shared.get_value(borrow=True)
             assert get_x is not x_orig  # borrow=False to shared_constructor
             get_x /= 0.5
@@ -262,7 +262,7 @@ def test_set_value(self):
         def test_shared_do_alias(self):
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype)
@@ -271,9 +271,9 @@ def test_shared_do_alias(self):
 
             x_shared = self.shared_constructor(x, borrow=True)
 
-            total = self.theano_fct(x_shared)
+            total = self.aesara_fct(x_shared)
 
-            total_func = theano.function([], total)
+            total_func = aesara.function([], total)
 
             total_val = total_func()
 
@@ -289,11 +289,11 @@ def test_shared_do_alias(self):
 
         def test_inplace_set_value(self):
             # We test that if the SharedVariable implement it we do inplace set_value
-            # We also test this for partial inplace modification when accessing the internal of theano.
+            # We also test this for partial inplace modification when accessing the internal of aesara.
 
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             shp = (100 // 4, 1024)  # 100KB
 
@@ -389,7 +389,7 @@ def test_inplace_set_value(self):
         def test_specify_shape(self):
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x1_1 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype)
@@ -406,38 +406,38 @@ def test_specify_shape(self):
             assert np.allclose(
                 self.ref_fct(x1_shared.get_value(borrow=True)), self.ref_fct(x1_2)
             )
-            shape_op_fct = theano.function([], x1_shared.shape)
+            shape_op_fct = aesara.function([], x1_shared.shape)
             topo = shape_op_fct.maker.fgraph.toposort()
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert len(topo) == 3
                 assert isinstance(topo[0].op, Shape_i)
                 assert isinstance(topo[1].op, Shape_i)
                 assert isinstance(topo[2].op, MakeVector)
 
             # Test that we forward the input
-            specify_shape_fct = theano.function([], x1_specify_shape)
+            specify_shape_fct = aesara.function([], x1_specify_shape)
             assert np.all(self.ref_fct(specify_shape_fct()) == self.ref_fct(x1_2))
             topo_specify = specify_shape_fct.maker.fgraph.toposort()
             assert len(topo_specify) == 2
 
             # Test that we put the shape info into the graph
-            shape_constant_fct = theano.function([], x1_specify_shape.shape)
+            shape_constant_fct = aesara.function([], x1_specify_shape.shape)
             assert np.all(shape_constant_fct() == shape_op_fct())
             topo_cst = shape_constant_fct.maker.fgraph.toposort()
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert len(topo_cst) == 1
-                topo_cst[0].op == theano.compile.function.types.deep_copy_op
+                topo_cst[0].op == aesara.compile.function.types.deep_copy_op
 
             # Test that we can take the grad.
-            if theano.sparse.enable_sparse and isinstance(
-                x1_specify_shape.type, theano.sparse.SparseType
+            if aesara.sparse.enable_sparse and isinstance(
+                x1_specify_shape.type, aesara.sparse.SparseType
             ):
                 # SparseVariable don't support sum for now.
                 assert not hasattr(x1_specify_shape, "sum")
             else:
-                shape_grad = theano.gradient.grad(x1_specify_shape.sum(), x1_shared)
-                shape_constant_fct_grad = theano.function([], shape_grad)
-                # theano.printing.debugprint(shape_constant_fct_grad)
+                shape_grad = aesara.gradient.grad(x1_specify_shape.sum(), x1_shared)
+                shape_constant_fct_grad = aesara.function([], shape_grad)
+                # aesara.printing.debugprint(shape_constant_fct_grad)
                 shape_constant_fct_grad()
 
             # Test that we can replace with values of the different shape
@@ -449,7 +449,7 @@ def test_specify_shape(self):
 
             # No assertion will be raised as the Op is removed from the graph
             # when their is optimization
-            if theano.config.mode not in ["FAST_COMPILE", "DebugMode", "DEBUG_MODE"]:
+            if aesara.config.mode not in ["FAST_COMPILE", "DebugMode", "DEBUG_MODE"]:
                 shape_constant_fct()
             else:
                 with pytest.raises(AssertionError):
@@ -458,7 +458,7 @@ def test_specify_shape(self):
         def test_specify_shape_partial(self):
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             x1_1 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype)
@@ -478,30 +478,30 @@ def test_specify_shape_partial(self):
             assert np.allclose(
                 self.ref_fct(x1_shared.get_value(borrow=True)), self.ref_fct(x1_2)
             )
-            shape_op_fct = theano.function([], x1_shared.shape)
+            shape_op_fct = aesara.function([], x1_shared.shape)
             topo = shape_op_fct.maker.fgraph.toposort()
             shape_op_fct()
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert len(topo) == 3
                 assert isinstance(topo[0].op, Shape_i)
                 assert isinstance(topo[1].op, Shape_i)
                 assert isinstance(topo[2].op, MakeVector)
 
             # Test that we forward the input
-            specify_shape_fct = theano.function([], x1_specify_shape)
+            specify_shape_fct = aesara.function([], x1_specify_shape)
             specify_shape_fct()
-            # theano.printing.debugprint(specify_shape_fct)
+            # aesara.printing.debugprint(specify_shape_fct)
             assert np.all(self.ref_fct(specify_shape_fct()) == self.ref_fct(x1_2))
             topo_specify = specify_shape_fct.maker.fgraph.toposort()
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert len(topo_specify) == 4
 
             # Test that we put the shape info into the graph
-            shape_constant_fct = theano.function([], x1_specify_shape.shape)
-            # theano.printing.debugprint(shape_constant_fct)
+            shape_constant_fct = aesara.function([], x1_specify_shape.shape)
+            # aesara.printing.debugprint(shape_constant_fct)
             assert np.all(shape_constant_fct() == shape_op_fct())
             topo_cst = shape_constant_fct.maker.fgraph.toposort()
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert len(topo_cst) == 2
 
             # Test that we can replace with values of the different shape
@@ -511,7 +511,7 @@ def test_specify_shape_partial(self):
                 specify_shape_fct()
 
             # No assertion will be raised as the Op is removed from the graph
-            if theano.config.mode not in ["FAST_COMPILE", "DebugMode", "DEBUG_MODE"]:
+            if aesara.config.mode not in ["FAST_COMPILE", "DebugMode", "DEBUG_MODE"]:
                 shape_constant_fct()
             else:
                 with pytest.raises(AssertionError):
@@ -522,7 +522,7 @@ def test_specify_shape_inplace(self):
 
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             rng = np.random.RandomState(utt.fetch_seed())
             a = np.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype)
@@ -534,14 +534,14 @@ def test_specify_shape_inplace(self):
             s = np.zeros((40, 40), dtype=dtype)
             s = self.cast_value(s)
             s_shared = self.shared_constructor(s)
-            f = theano.function(
+            f = aesara.function(
                 [],
-                updates=[(s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared)],
+                updates=[(s_shared, aesara.tensor.dot(a_shared, b_shared) + s_shared)],
             )
             topo = f.maker.fgraph.toposort()
             f()
             # [Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)]
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert (
                     sum(
                         [
@@ -553,9 +553,9 @@ def test_specify_shape_inplace(self):
                     == 1
                 )
                 assert all(
-                    node.op == theano.tensor.blas.gemm_inplace
+                    node.op == aesara.tensor.blas.gemm_inplace
                     for node in topo
-                    if isinstance(node.op, theano.tensor.blas.Gemm)
+                    if isinstance(node.op, aesara.tensor.blas.Gemm)
                 )
                 assert all(
                     node.op.inplace
@@ -569,17 +569,17 @@ def test_specify_shape_inplace(self):
             )
 
             # now test with the specify shape op in the output
-            f = theano.function(
+            f = aesara.function(
                 [],
                 s_shared.shape,
                 updates=[
-                    (s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared_specify)
+                    (s_shared, aesara.tensor.dot(a_shared, b_shared) + s_shared_specify)
                 ],
             )
             topo = f.maker.fgraph.toposort()
             shp = f()
             assert np.all(shp == (40, 40))
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert (
                     sum(
                         [
@@ -591,9 +591,9 @@ def test_specify_shape_inplace(self):
                     == 1
                 )
                 assert all(
-                    node.op == theano.tensor.blas.gemm_inplace
+                    node.op == aesara.tensor.blas.gemm_inplace
                     for node in topo
-                    if isinstance(node.op, theano.tensor.blas.Gemm)
+                    if isinstance(node.op, aesara.tensor.blas.Gemm)
                 )
                 assert all(
                     node.op.inplace
@@ -604,17 +604,17 @@ def test_specify_shape_inplace(self):
             a_shared = specify_shape(a_shared, a_shared.get_value(borrow=True).shape)
             b_shared = specify_shape(b_shared, b_shared.get_value(borrow=True).shape)
 
-            f = theano.function(
+            f = aesara.function(
                 [],
                 s_shared.shape,
                 updates=[
-                    (s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared_specify)
+                    (s_shared, aesara.tensor.dot(a_shared, b_shared) + s_shared_specify)
                 ],
             )
             topo = f.maker.fgraph.toposort()
             shp = f()
             assert np.all(shp == (40, 40))
-            if theano.config.mode != "FAST_COMPILE":
+            if aesara.config.mode != "FAST_COMPILE":
                 assert (
                     sum(
                         [
@@ -626,9 +626,9 @@ def test_specify_shape_inplace(self):
                     == 1
                 )
                 assert all(
-                    node.op == theano.tensor.blas.gemm_inplace
+                    node.op == aesara.tensor.blas.gemm_inplace
                     for node in topo
-                    if isinstance(node.op, theano.tensor.blas.Gemm)
+                    if isinstance(node.op, aesara.tensor.blas.Gemm)
                 )
                 assert all(
                     node.op.inplace
@@ -637,9 +637,9 @@ def test_specify_shape_inplace(self):
                 )
 
         if (
-            theano.config.cycle_detection == "fast"
+            aesara.config.cycle_detection == "fast"
             and expect_fail_fast_shape_inplace
-            and theano.config.mode != "FAST_COMPILE"
+            and aesara.config.mode != "FAST_COMPILE"
         ):
             test_specify_shape_inplace = pytest.mark.xfail(test_specify_shape_inplace)
 
@@ -647,7 +647,7 @@ def test_values_eq(self):
             # Test the type.values_eq[_approx] function
             dtype = self.dtype
             if dtype is None:
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
 
             # We need big shape as in the past there have been a bug in the
             # sparse values_eq_approx.
@@ -681,8 +681,8 @@ def f(cls):
 
 
 @makeSharedTester(
-    shared_constructor_=theano.shared,
-    dtype_=theano.config.floatX,
+    shared_constructor_=aesara.shared,
+    dtype_=aesara.config.floatX,
     get_value_borrow_true_alias_=True,
     shared_borrow_true_alias_=True,
     set_value_borrow_true_alias_=True,
@@ -691,7 +691,7 @@ def f(cls):
     shared_constructor_accept_ndarray_=True,
     internal_type_=np.ndarray,
     check_internal_type_=lambda a: isinstance(a, np.ndarray),
-    theano_fct_=lambda a: a * 2,
+    aesara_fct_=lambda a: a * 2,
     ref_fct_=lambda a: np.asarray(a * 2),
     cast_value_=np.asarray,
 )
@@ -701,5 +701,5 @@ class TestSharedOptions:
 
 def test_scalar_shared_options():
     # Simple test to make sure we do not loose that fonctionality.
-    theano.shared(value=0.0, name="lk", borrow=True)
-    theano.shared(value=np.float32(0.0), name="lk", borrow=True)
+    aesara.shared(value=0.0, name="lk", borrow=True)
+    aesara.shared(value=np.float32(0.0), name="lk", borrow=True)
diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py
index f5a0816f13..74f7990a17 100644
--- a/tests/tensor/test_slinalg.py
+++ b/tests/tensor/test_slinalg.py
@@ -4,12 +4,11 @@
 import numpy.linalg
 import pytest
 
-import theano
-from tests import unittest_tools as utt
-from theano import function, grad
-from theano import tensor as tt
-from theano.configdefaults import config
-from theano.tensor.slinalg import (
+import aesara
+from aesara import function, grad
+from aesara import tensor as tt
+from aesara.configdefaults import config
+from aesara.tensor.slinalg import (
     Cholesky,
     CholeskyGrad,
     Solve,
@@ -19,7 +18,8 @@
     kron,
     solve,
 )
-from theano.tensor.type import dmatrix, matrix, tensor, vector
+from aesara.tensor.type import dmatrix, matrix, tensor, vector
+from tests import unittest_tools as utt
 
 
 def check_lower_triangular(pd, ch_f):
@@ -111,9 +111,9 @@ def test_cholesky_and_cholesky_grad_shape():
     rng = np.random.RandomState(utt.fetch_seed())
     x = matrix()
     for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)):
-        f_chol = theano.function([x], l.shape)
-        g = theano.gradient.grad(l.sum(), x)
-        f_cholgrad = theano.function([x], g.shape)
+        f_chol = aesara.function([x], l.shape)
+        g = aesara.gradient.grad(l.sum(), x)
+        f_cholgrad = aesara.function([x], g.shape)
         topo_chol = f_chol.maker.fgraph.toposort()
         topo_cholgrad = f_cholgrad.maker.fgraph.toposort()
         if config.mode != "FAST_COMPILE":
@@ -176,8 +176,8 @@ def test_infer_shape(self):
         A = matrix()
         b = matrix()
         self._compile_and_check(
-            [A, b],  # theano.function inputs
-            [self.op(A, b)],  # theano.function outputs
+            [A, b],  # aesara.function inputs
+            [self.op(A, b)],  # aesara.function outputs
             # A must be square
             [
                 np.asarray(rng.rand(5, 5), dtype=config.floatX),
@@ -190,8 +190,8 @@ def test_infer_shape(self):
         A = matrix()
         b = vector()
         self._compile_and_check(
-            [A, b],  # theano.function inputs
-            [self.op(A, b)],  # theano.function outputs
+            [A, b],  # aesara.function inputs
+            [self.op(A, b)],  # aesara.function outputs
             # A must be square
             [
                 np.asarray(rng.rand(5, 5), dtype=config.floatX),
@@ -207,17 +207,17 @@ def test_solve_correctness(self):
         A = matrix()
         b = matrix()
         y = self.op(A, b)
-        gen_solve_func = theano.function([A, b], y)
+        gen_solve_func = aesara.function([A, b], y)
 
         cholesky_lower = Cholesky(lower=True)
         L = cholesky_lower(A)
         y_lower = self.op(L, b)
-        lower_solve_func = theano.function([L, b], y_lower)
+        lower_solve_func = aesara.function([L, b], y_lower)
 
         cholesky_upper = Cholesky(lower=False)
         U = cholesky_upper(A)
         y_upper = self.op(U, b)
-        upper_solve_func = theano.function([U, b], y_upper)
+        upper_solve_func = aesara.function([U, b], y_upper)
 
         b_val = np.asarray(rng.rand(5, 1), dtype=config.floatX)
 
diff --git a/tests/tensor/test_sort.py b/tests/tensor/test_sort.py
index 702e32b452..9b3c6f204e 100644
--- a/tests/tensor/test_sort.py
+++ b/tests/tensor/test_sort.py
@@ -4,10 +4,9 @@
 import numpy as np
 import pytest
 
-import theano
-from tests import unittest_tools as utt
-from theano.compile.mode import Mode
-from theano.tensor.sort import (
+import aesara
+from aesara.compile.mode import Mode
+from aesara.tensor.sort import (
     ArgSortOp,
     SortOp,
     TopKOp,
@@ -17,7 +16,7 @@
     topk,
     topk_and_argtopk,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     dmatrix,
     dvector,
     float_dtypes,
@@ -28,6 +27,7 @@
     tensor,
     vector,
 )
+from tests import unittest_tools as utt
 
 
 _all_dtypes = integer_dtypes + float_dtypes
@@ -48,14 +48,14 @@ def setup_method(self):
     def test1(self):
         a = dmatrix()
         w = sort(a)
-        f = theano.function([a], w)
+        f = aesara.function([a], w)
         utt.assert_allclose(f(self.m_val), np.sort(self.m_val))
 
     def test2(self):
         a = dmatrix()
         axis = scalar()
         w = sort(a, axis)
-        f = theano.function([a, axis], w)
+        f = aesara.function([a, axis], w)
         for axis_val in 0, 1:
             gv = f(self.m_val, axis_val)
             gt = np.sort(self.m_val, axis_val)
@@ -64,7 +64,7 @@ def test2(self):
     def test3(self):
         a = dvector()
         w2 = sort(a)
-        f = theano.function([a], w2)
+        f = aesara.function([a], w2)
         gv = f(self.v_val)
         gt = np.sort(self.v_val)
         utt.assert_allclose(gv, gt)
@@ -73,7 +73,7 @@ def test4(self):
         a = dmatrix()
         axis = scalar()
         l = sort(a, axis, "mergesort")
-        f = theano.function([a, axis], l)
+        f = aesara.function([a, axis], l)
         for axis_val in 0, 1:
             gv = f(self.m_val, axis_val)
             gt = np.sort(self.m_val, axis_val)
@@ -91,71 +91,71 @@ def test5(self):
     def test_None(self):
         a = dmatrix()
         l = sort(a, None)
-        f = theano.function([a], l)
+        f = aesara.function([a], l)
         gv = f(self.m_val)
         gt = np.sort(self.m_val, None)
         utt.assert_allclose(gv, gt)
 
     def test_grad_vector(self):
-        data = np.random.rand(10).astype(theano.config.floatX)
+        data = np.random.rand(10).astype(aesara.config.floatX)
         utt.verify_grad(sort, [data])
 
     def test_grad_none_axis(self):
-        data = np.random.rand(10).astype(theano.config.floatX)
+        data = np.random.rand(10).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, None), [data])
         utt.verify_grad(lambda x: sort(x, 0), [data])
 
-        data = np.random.rand(2, 3).astype(theano.config.floatX)
+        data = np.random.rand(2, 3).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, None), [data])
-        data = np.random.rand(2, 3, 4).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, None), [data])
 
     def test_grad_negative_axis_2d(self):
-        data = np.random.rand(2, 3).astype(theano.config.floatX)
+        data = np.random.rand(2, 3).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -1), [data])
-        data = np.random.rand(2, 3).astype(theano.config.floatX)
+        data = np.random.rand(2, 3).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -2), [data])
 
     def test_grad_negative_axis_3d(self):
-        data = np.random.rand(2, 3, 4).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -1), [data])
-        data = np.random.rand(2, 3, 4).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -2), [data])
-        data = np.random.rand(2, 3, 4).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -3), [data])
 
     def test_grad_negative_axis_4d(self):
-        data = np.random.rand(2, 3, 4, 2).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4, 2).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -1), [data])
-        data = np.random.rand(2, 3, 4, 2).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4, 2).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -2), [data])
-        data = np.random.rand(2, 3, 4, 2).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4, 2).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -3), [data])
-        data = np.random.rand(2, 3, 4, 2).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4, 2).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, -4), [data])
 
     def test_grad_nonnegative_axis_2d(self):
-        data = np.random.rand(2, 3).astype(theano.config.floatX)
+        data = np.random.rand(2, 3).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 0), [data])
-        data = np.random.rand(2, 3).astype(theano.config.floatX)
+        data = np.random.rand(2, 3).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 1), [data])
 
     def test_grad_nonnegative_axis_3d(self):
-        data = np.random.rand(2, 3, 4).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 0), [data])
-        data = np.random.rand(2, 3, 4).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 1), [data])
-        data = np.random.rand(2, 3, 4).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 2), [data])
 
     def test_grad_nonnegative_axis_4d(self):
-        data = np.random.rand(2, 3, 4, 2).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4, 2).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 0), [data])
-        data = np.random.rand(2, 3, 4, 2).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4, 2).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 1), [data])
-        data = np.random.rand(2, 3, 4, 2).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4, 2).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 2), [data])
-        data = np.random.rand(2, 3, 4, 2).astype(theano.config.floatX)
+        data = np.random.rand(2, 3, 4, 2).astype(aesara.config.floatX)
         utt.verify_grad(lambda x: sort(x, 3), [data])
 
 
@@ -165,13 +165,13 @@ def test_sort(self):
         self._compile_and_check(
             [x],
             [sort(x)],
-            [np.random.randn(10, 40).astype(theano.config.floatX)],
+            [np.random.randn(10, 40).astype(aesara.config.floatX)],
             SortOp,
         )
         self._compile_and_check(
             [x],
             [sort(x, axis=None)],
-            [np.random.randn(10, 40).astype(theano.config.floatX)],
+            [np.random.randn(10, 40).astype(aesara.config.floatX)],
             SortOp,
         )
 
@@ -185,7 +185,7 @@ def test_argsort():
     # Example 1
     a = dmatrix()
     w = argsort(a)
-    f = theano.function([a], w)
+    f = aesara.function([a], w)
     gv = f(m_val)
     gt = np.argsort(m_val)
     utt.assert_allclose(gv, gt)
@@ -194,7 +194,7 @@ def test_argsort():
     a = dmatrix()
     axis = lscalar()
     w = argsort(a, axis)
-    f = theano.function([a, axis], w)
+    f = aesara.function([a, axis], w)
     for axis_val in 0, 1:
         gv = f(m_val, axis_val)
         gt = np.argsort(m_val, axis_val)
@@ -203,7 +203,7 @@ def test_argsort():
     # Example 3
     a = dvector()
     w2 = argsort(a)
-    f = theano.function([a], w2)
+    f = aesara.function([a], w2)
     gv = f(v_val)
     gt = np.argsort(v_val)
     utt.assert_allclose(gv, gt)
@@ -212,7 +212,7 @@ def test_argsort():
     a = dmatrix()
     axis = lscalar()
     l = argsort(a, axis, "mergesort")
-    f = theano.function([a, axis], l)
+    f = aesara.function([a, axis], l)
     for axis_val in 0, 1:
         gv = f(m_val, axis_val)
         gt = np.argsort(m_val, axis_val)
@@ -231,7 +231,7 @@ def test_argsort():
     # Example 6: Testing axis=None
     a = dmatrix()
     w2 = argsort(a, None)
-    f = theano.function([a], w2)
+    f = aesara.function([a], w2)
     gv = f(m_val)
     gt = np.argsort(m_val, None)
     utt.assert_allclose(gv, gt)
@@ -239,13 +239,13 @@ def test_argsort():
 
 def test_argsort_grad():
     # Testing grad of argsort
-    data = np.random.rand(2, 3).astype(theano.config.floatX)
+    data = np.random.rand(2, 3).astype(aesara.config.floatX)
     utt.verify_grad(lambda x: argsort(x, axis=-1), [data])
 
-    data = np.random.rand(2, 3, 4, 5).astype(theano.config.floatX)
+    data = np.random.rand(2, 3, 4, 5).astype(aesara.config.floatX)
     utt.verify_grad(lambda x: argsort(x, axis=-3), [data])
 
-    data = np.random.rand(2, 3, 3).astype(theano.config.floatX)
+    data = np.random.rand(2, 3, 3).astype(aesara.config.floatX)
     utt.verify_grad(lambda x: argsort(x, axis=2), [data])
 
 
@@ -262,7 +262,7 @@ def setup_method(self):
     @pytest.mark.parametrize("sorted", [False])
     def test_argtopk_sanity(self, dtype, idx_dtype, axis, sorted):
         x = vector(name="x", dtype=dtype)
-        fn = theano.function(
+        fn = aesara.function(
             [x],
             argtopk(x, 1, axis=axis, sorted=sorted, idx_dtype=idx_dtype),
             mode=self.mode,
@@ -280,7 +280,7 @@ def test_argtopk_sanity(self, dtype, idx_dtype, axis, sorted):
     @pytest.mark.parametrize("sorted", [False])
     def test_topk_sanity(self, dtype, axis, sorted):
         x = vector(name="x", dtype=dtype)
-        fn = theano.function([x], topk(x, 1, axis=axis, sorted=sorted), mode=self.mode)
+        fn = aesara.function([x], topk(x, 1, axis=axis, sorted=sorted), mode=self.mode)
         assert any(
             [isinstance(n.op, self.op_class) for n in fn.maker.fgraph.apply_nodes]
         )
@@ -296,7 +296,7 @@ def test_topk_sanity(self, dtype, axis, sorted):
     def test_combined_sanity(self, dtype, idx_dtype, axis, sorted):
         x = vector(name="x", dtype=dtype)
         yv, yi = topk_and_argtopk(x, 1, axis=axis, sorted=sorted, idx_dtype=idx_dtype)
-        fn = theano.function([x], [yv, yi], mode=self.mode)
+        fn = aesara.function([x], [yv, yi], mode=self.mode)
         assert any(
             [isinstance(n.op, self.op_class) for n in fn.maker.fgraph.apply_nodes]
         )
@@ -325,7 +325,7 @@ def test_topk_1d(self, size, k, dtype, sorted):
 
         x = vector(name="x", dtype=dtype)
         y = topk(x, k, sorted=sorted)
-        fn = theano.function([x], y, mode=self.mode)
+        fn = aesara.function([x], y, mode=self.mode)
         assert any(
             [isinstance(n.op, self.op_class) for n in fn.maker.fgraph.apply_nodes]
         )
@@ -360,7 +360,7 @@ def test_argtopk_1d(self, size, k, dtype, sorted, idx_dtype):
 
         x = vector(name="x", dtype=dtype)
         y = argtopk(x, k, sorted=sorted, idx_dtype=idx_dtype)
-        fn = theano.function([x], y, mode=self.mode)
+        fn = aesara.function([x], y, mode=self.mode)
         assert any(
             [isinstance(n.op, self.op_class) for n in fn.maker.fgraph.apply_nodes]
         )
@@ -396,7 +396,7 @@ def test_combined_1d(self, size, k, dtype, sorted, idx_dtype):
 
         x = vector(name="x", dtype=dtype)
         yv, yi = topk_and_argtopk(x, k, sorted=sorted, idx_dtype=idx_dtype)
-        fn = theano.function([x], [yv, yi], mode=self.mode)
+        fn = aesara.function([x], [yv, yi], mode=self.mode)
         assert any(
             [isinstance(n.op, self.op_class) for n in fn.maker.fgraph.apply_nodes]
         )
@@ -428,9 +428,9 @@ def test_argtopk_1d_collision(self, size, k, dtype, sorted):
         # DebugMode won't like the index change on collision on CPU
         # So don't use DebugMode here.
         mode = self.mode
-        if isinstance(self.mode, theano.compile.debugmode.DebugMode):
+        if isinstance(self.mode, aesara.compile.debugmode.DebugMode):
             mode = Mode(optimizer=mode.optimizer)
-        fn = theano.function([x], y, mode=mode)
+        fn = aesara.function([x], y, mode=mode)
         assert any(
             [isinstance(n.op, self.op_class) for n in fn.maker.fgraph.apply_nodes]
         )
@@ -470,7 +470,7 @@ def test_argtopk_nd(self, shp, k_, dtype, sorted, idx_dtype):
 
             x = tensor(name="x", broadcastable=(False,) * len(shp), dtype=dtype)
             y = argtopk(x, k, axis=axis, sorted=sorted, idx_dtype=idx_dtype)
-            fn = theano.function([x], y, mode=self.mode)
+            fn = aesara.function([x], y, mode=self.mode)
             assert any(
                 [isinstance(n.op, self.op_class) for n in fn.maker.fgraph.apply_nodes]
             )
@@ -501,7 +501,7 @@ def test_grad(self, shp, k_, sorted):
 
             # make input away from undefined gradient (where some inputs are equal)
             xval = gen_unique_vector(
-                reduce(int.__mul__, shp), dtype=theano.config.floatX
+                reduce(int.__mul__, shp), dtype=aesara.config.floatX
             ).reshape(shp)
             utt.verify_grad(
                 lambda x: topk(x, k, axis=axis, sorted=sorted), [xval], eps=1e-2
@@ -525,9 +525,9 @@ def test_combined_infer_shape(self, shp, k_):
                 continue
 
             x = tensor(
-                name="x", broadcastable=(False,) * len(shp), dtype=theano.config.floatX
+                name="x", broadcastable=(False,) * len(shp), dtype=aesara.config.floatX
             )
             yv, yi = topk_and_argtopk(x, k, axis=axis, sorted=False, idx_dtype="int32")
             size = reduce(int.__mul__, shp)
-            xval = gen_unique_vector(size, theano.config.floatX).reshape(shp)
+            xval = gen_unique_vector(size, aesara.config.floatX).reshape(shp)
             self._compile_and_check([x], [yv, yi], [xval], TopKOp)
diff --git a/tests/tensor/test_subtensor.py b/tests/tensor/test_subtensor.py
index 13ddfdcce1..efceceb488 100644
--- a/tests/tensor/test_subtensor.py
+++ b/tests/tensor/test_subtensor.py
@@ -6,20 +6,18 @@
 import pytest
 from numpy.testing import assert_array_equal
 
-import theano
-import theano.scalar as scal
-import theano.tensor.basic as tt
-from tests import unittest_tools as utt
-from tests.tensor.utils import inplace_func, rand, randint_ranged
-from theano.compile import DeepCopyOp, shared
-from theano.compile.io import In
-from theano.configdefaults import config
-from theano.graph.op import get_test_value
-from theano.graph.toolbox import is_same_graph
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.math import exp, isinf
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.subtensor import (
+import aesara
+import aesara.scalar as scal
+import aesara.tensor.basic as tt
+from aesara.compile import DeepCopyOp, shared
+from aesara.compile.io import In
+from aesara.configdefaults import config
+from aesara.graph.op import get_test_value
+from aesara.graph.toolbox import is_same_graph
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.math import exp, isinf
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
     AdvancedSubtensor,
@@ -36,7 +34,7 @@
     indexed_result_shape,
     set_subtensor,
 )
-from theano.tensor.type import (
+from aesara.tensor.type import (
     TensorType,
     col,
     cscalar,
@@ -61,7 +59,9 @@
     tensor4,
     vector,
 )
-from theano.tensor.type_other import make_slice, slicetype
+from aesara.tensor.type_other import make_slice, slicetype
+from tests import unittest_tools as utt
+from tests.tensor.utils import inplace_func, rand, randint_ranged
 
 
 subtensor_ops = (
@@ -80,7 +80,7 @@ class TestSubtensor(utt.OptimizationTestMixin):
     def setup_method(self):
         self.shared = shared
         self.dtype = config.floatX
-        mode = theano.compile.mode.get_default_mode()
+        mode = aesara.compile.mode.get_default_mode()
         self.mode = mode.including("local_useless_subtensor")
         self.fast_compile = config.mode == "FAST_COMPILE"
         utt.seed_rng()
@@ -96,7 +96,7 @@ def function(
         N_fast=None,
     ):
         """
-        wrapper around theano.function that also check the output
+        wrapper around aesara.function that also check the output
 
         :param N: the number of op expected in the toposort
                   if tuple of length 2, (expected if fast_compile,
@@ -109,7 +109,7 @@ def function(
         if op is None:
             op = Subtensor
 
-        f = theano.function(inputs, outputs, mode=mode, accept_inplace=accept_inplace)
+        f = aesara.function(inputs, outputs, mode=mode, accept_inplace=accept_inplace)
         self.assertFunctionContainsClassN(f, op, N)
         return f
 
@@ -139,7 +139,7 @@ def test_err_bounds(self):
         t = n[7]
         assert isinstance(t.owner.op, Subtensor)
         # Silence expected error messages
-        _logger = logging.getLogger("theano.graph.opt")
+        _logger = logging.getLogger("aesara.graph.opt")
         oldlevel = _logger.level
         _logger.setLevel(logging.CRITICAL)
         try:
@@ -220,7 +220,7 @@ def test_err_bounds0(self):
             t = n[idx]
             assert isinstance(t.owner.op, Subtensor)
             # Silence expected warnings
-            _logger = logging.getLogger("theano.graph.opt")
+            _logger = logging.getLogger("aesara.graph.opt")
             oldlevel = _logger.level
             _logger.setLevel(logging.CRITICAL)
             try:
@@ -325,7 +325,7 @@ def test_slice_symbol(self):
         x = self.shared(np.random.rand(5, 4).astype(self.dtype))
         y = self.shared(np.random.rand(1, 2, 3).astype(self.dtype))
         o = x[: y.shape[0], None, :]
-        f = theano.function([], o, mode=self.mode)
+        f = aesara.function([], o, mode=self.mode)
         ret = f()
         assert ret.shape == (1, 1, 4)
 
@@ -524,7 +524,7 @@ def numpy_inc_subtensor(x, idx, a):
             with pytest.raises(TypeError):
                 test_array.__getitem__(([0, 1], [0, False]))
             with pytest.raises(TypeError):
-                test_array.__getitem__(([0, 1], [0, theano.shared(True)]))
+                test_array.__getitem__(([0, 1], [0, aesara.shared(True)]))
 
     def test_newaxis(self):
         # newaxis support comes from logic in the __getitem__ of TensorType
@@ -559,7 +559,7 @@ def test_newaxis(self):
         s1 = s[newaxis]
         assert s1.broadcastable == (True,), s1
 
-        vs1, vn3, vn4 = theano.function([s], [s1, n3, n4], mode=self.mode)(-2.0)
+        vs1, vn3, vn4 = aesara.function([s], [s1, n3, n4], mode=self.mode)(-2.0)
 
         assert np.all(vs1 == [-2.0])
         assert np.all(vn3 == np.arange(24)[newaxis, :, newaxis])
@@ -571,7 +571,7 @@ def test_grad_1d(self):
         n = self.shared(data)
         z = scal.constant(subi).astype("int32")
         t = n[z:, z]
-        gn = theano.grad(tt_sum(exp(t)), n)
+        gn = aesara.grad(tt_sum(exp(t)), n)
 
         f = inplace_func([], gn, mode=self.mode)
         topo = f.maker.fgraph.toposort()
@@ -602,7 +602,7 @@ def test_grad_2d_inc_set_subtensor(self):
                 mv = np.asarray(rand(*m_shape), dtype=self.dtype)
 
                 t = op(n[:z, :z], m)
-                gn, gm = theano.grad(tt_sum(t), [n, m])
+                gn, gm = aesara.grad(tt_sum(t), [n, m])
                 utt.verify_grad(lambda m: op(n[:z, :z], m), [mv], mode=self.mode)
                 utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data], mode=self.mode)
 
@@ -610,7 +610,7 @@ def test_grad_0d(self):
         data = np.asarray(rand(2, 3), dtype=self.dtype)
         n = self.shared(data)
         t = n[1, 0]
-        gn = theano.grad(tt_sum(exp(t)), n)
+        gn = aesara.grad(tt_sum(exp(t)), n)
         f = self.function([], gn)
         topo = f.maker.fgraph.toposort()
         topo_ = [node for node in topo if not isinstance(node.op, DeepCopyOp)]
@@ -667,7 +667,7 @@ def test_ok_list(self):
                 assert out1 is out2
 
             # test the grad
-            gn = theano.grad(t.sum(), n)
+            gn = aesara.grad(t.sum(), n)
             g = self.function([], gn, op=AdvancedIncSubtensor1)
             utt.verify_grad(
                 lambda m: m[[1, 3]],
@@ -741,7 +741,7 @@ def test_adv_sub1_broadcast(self):
 
         # Test the gradient
         c = t.sum()
-        gn = theano.grad(c, n)
+        gn = aesara.grad(c, n)
         g = self.function([idx], gn, op=AdvancedIncSubtensor1)
         g_0 = g([0])
         assert g_0.shape == (1, 3)
@@ -784,12 +784,12 @@ def fun(x, y):
         self.dtype = "float32"
 
         x = tensor4("x", dtype=self.dtype)
-        indexes = theano.shared(np.int32([1, 2, 3, 4]))
+        indexes = aesara.shared(np.int32([1, 2, 3, 4]))
         W = self.shared(np.random.random((10, 10, 3, 3)).astype(self.dtype))
 
         h = x + W
         h = set_subtensor(h[indexes], h[indexes])
-        g = theano.grad(h.sum(), W)
+        g = aesara.grad(h.sum(), W)
         N = 2
         if (
             config.mode == "FAST_COMPILE"
@@ -820,7 +820,7 @@ def test_adv_sub1_idx_broadcast(self):
 
         # Test the gradient
         c = t.sum()
-        gn = theano.grad(c, n)
+        gn = aesara.grad(c, n)
         g = self.function([idx], gn, op=AdvancedIncSubtensor1)
         g_0 = g([0])
         assert g_0.shape == (4, 3)
@@ -1061,7 +1061,7 @@ def grad_list_(self, idxs, data):
             # Should stay on the cpu.
             idx_ = shared(np.asarray(idx))
             t = n[idx_]
-            gn = theano.grad(tt_sum(exp(t)), n)
+            gn = aesara.grad(tt_sum(exp(t)), n)
             f = self.function([], [gn, gn.shape], op=AdvancedIncSubtensor1)
             topo = f.maker.fgraph.toposort()
             if not self.fast_compile:
@@ -1093,7 +1093,7 @@ def fct(t):
 
             # Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
             def fct2(t):
-                return theano.grad(tt_sum(t[idx_]), t)
+                return aesara.grad(tt_sum(t[idx_]), t)
 
             utt.verify_grad(fct2, [data], mode=self.mode)
 
@@ -1241,7 +1241,7 @@ def test_advanced1_inc_and_set(self):
                         inc_num = rng.uniform(size=inc_size).astype(self.dtype)
                         inc_num = inc_num.reshape(inc_shape)
                         # Result of the incrementation.
-                        # (i) Theano
+                        # (i) Aesara
                         if set_instead_of_inc:
                             op = set_subtensor
                         else:
@@ -1272,7 +1272,7 @@ def test_advanced1_inc_and_set(self):
                                     data_copy[idx] += inc_num
                         data_var = In(data_var, mutable=True)
 
-                        # Remember data for the Theano function (see below).
+                        # Remember data for the Aesara function (see below).
                         all_inputs_var += [data_var, idx_var, inc_var]
                         all_inputs_num += [data_num, idx_num, inc_num]
                         all_outputs_var.append(output)
@@ -1281,7 +1281,7 @@ def test_advanced1_inc_and_set(self):
                             (set_instead_of_inc, inplace, data_shape, inc_shape)
                         )
 
-        # Actual test (we compile a single Theano function to make it faster).
+        # Actual test (we compile a single Aesara function to make it faster).
         with config.change_flags(warn__gpu_set_subtensor1=False):
             f = self.function(
                 all_inputs_var,
@@ -1319,7 +1319,7 @@ def test_adv1_inc_sub_notlastdim(self):
 
         m1 = set_subtensor(m[:, i], 0)
         m2 = inc_subtensor(m[:, i], 1)
-        f = theano.function([m, i], [m1, m2], mode=self.mode)
+        f = aesara.function([m, i], [m1, m2], mode=self.mode)
 
         m_val = rand(3, 5)
         i_val = randint_ranged(min=0, max=4, shape=(4,))
@@ -1344,7 +1344,7 @@ def test_adv1_inc_sub_notlastdim_2didx(self):
         m1 = set_subtensor(m[:, i], 0)
         m2 = inc_subtensor(m[:, i], 1)
 
-        f = theano.function([m, i], [m1, m2], mode=self.mode)
+        f = aesara.function([m, i], [m1, m2], mode=self.mode)
 
         m_val = rand(5, 7)
         i_val = randint_ranged(min=0, max=6, shape=(4, 2))
@@ -1379,7 +1379,7 @@ def test_adv1_inc_sub_notlastdim_1_2dval_broadcast(self):
                 sub_m = m[:, i]
                 m1 = set_subtensor(sub_m, np.zeros(shp_v))
                 m2 = inc_subtensor(sub_m, np.ones(shp_v))
-                f = theano.function([m, i], [m1, m2], mode=self.mode)
+                f = aesara.function([m, i], [m1, m2], mode=self.mode)
 
                 m_val = rand(3, 5)
                 i_val = randint_ranged(min=0, max=4, shape=shp_i)
@@ -1416,7 +1416,7 @@ def test_adv1_inc_sub_notlastdim_1_2dval_no_broadcast(self):
                 sub_m = m[:, i]
                 m1 = set_subtensor(sub_m, np.zeros(shp_v))
                 m2 = inc_subtensor(sub_m, np.ones(shp_v))
-                f = theano.function([m, i], [m1, m2], mode=self.mode)
+                f = aesara.function([m, i], [m1, m2], mode=self.mode)
 
                 m_val = rand(3, 5)
                 i_val = randint_ranged(min=0, max=4, shape=shp_i)
@@ -1438,7 +1438,7 @@ def test_adv1_inc_sub_notlastdim_1_2dval_no_broadcast(self):
 
     def test_take(self):
         a = matrix()
-        f = theano.function(
+        f = aesara.function(
             [a], a.take(0, axis=-1), allow_input_downcast=True, mode=self.mode
         )
         f(np.random.normal(0, 1, (30, 4)))
@@ -1480,7 +1480,7 @@ def test_simple_2d(self):
             else:
                 resut = inc_subtensor(a[sl1, sl2], increment)
 
-            f = theano.function([a, increment, sl2_end], resut)
+            f = aesara.function([a, increment, sl2_end], resut)
 
             val_a = np.ones((5, 5))
             val_inc = 2.3
@@ -1515,12 +1515,12 @@ def test_wrong_broadcast(self):
         rng = np.random.RandomState(utt.fetch_seed())
 
         def rng_randX(*shape):
-            return rng.rand(*shape).astype(theano.config.floatX)
+            return rng.rand(*shape).astype(aesara.config.floatX)
 
         for op in (set_subtensor, inc_subtensor):
             for base in (a[:], a[0]):
                 out = op(base, increment)
-                f = theano.function([a, increment], out)
+                f = aesara.function([a, increment], out)
                 # This one should work
                 f(rng_randX(3, 1), rng_randX(1))
                 # These ones should not
@@ -1551,7 +1551,7 @@ def test_simple_3d(self):
 
             resut = method(a[sl1, sl3, sl2], increment)
 
-            f = theano.function([a, increment, sl2_end], resut)
+            f = aesara.function([a, increment, sl2_end], resut)
 
             expected_result = np.copy(val_a)
             result = f(val_a, val_inc, val_sl2_end)
@@ -1566,7 +1566,7 @@ def test_simple_3d(self):
             # Test when we broadcast the result
             resut = method(a[sl1, sl2], increment)
 
-            f = theano.function([a, increment, sl2_end], resut)
+            f = aesara.function([a, increment, sl2_end], resut)
 
             expected_result = np.copy(val_a)
             result = f(val_a, val_inc, val_sl2_end)
@@ -1659,7 +1659,7 @@ def test_1d_set_adv_selection(self):
         # TODO: compile a function and verify that the subtensor is removed
         #      completely, because the whole expression is redundant.
 
-        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        f = aesara.function([self.v, self.adv1q], a, allow_input_downcast=True)
         aval = f([0.4, 0.9, 0.1], [1, 2])
         assert np.allclose(aval, [0.4, 0.9, 0.1])
 
@@ -1667,7 +1667,7 @@ def test_1d_inc_adv_selection(self):
         a = inc_subtensor(self.v[self.adv1q], self.v[self.adv1q])
 
         assert a.type == self.v.type
-        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        f = aesara.function([self.v, self.adv1q], a, allow_input_downcast=True)
         aval = f([0.4, 0.9, 0.1], [1, 2])
         assert np.allclose(aval, [0.4, 1.8, 0.2])
 
@@ -1675,7 +1675,7 @@ def test_1d_inc_adv_selection_w_broadcasting(self):
         a = inc_subtensor(self.v[self.adv1q], 3.0)
 
         assert a.type == self.v.type
-        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        f = aesara.function([self.v, self.adv1q], a, allow_input_downcast=True)
         aval = f([0.4, 0.9, 0.1], [1, 2])
         assert np.allclose(aval, [0.4, 3.9, 3.1])
 
@@ -1687,7 +1687,7 @@ def test_matrix_idx(self):
         idx = lmatrix()
         a = self.m[idx]
         a2 = inc_subtensor(a, a)
-        f = theano.function([self.m, idx], a2)
+        f = aesara.function([self.m, idx], a2)
 
         mval = self.rng.random_sample((4, 10))
         idxval = np.array([[1, 2], [3, 2]])
@@ -1705,7 +1705,7 @@ def test_inc_bcastableidx(self):
         out1 = inc_subtensor(self.m[:, idx], c_inc)
         out2 = inc_subtensor(self.m[:, idx], m_inc)
 
-        f = theano.function([self.m, c_inc, m_inc], [out1, out2])
+        f = aesara.function([self.m, c_inc, m_inc], [out1, out2])
         mval = self.rng.random_sample((10, 5))
         incval = self.rng.random_sample((10, 1)).astype(config.floatX)
 
@@ -1719,7 +1719,7 @@ class TestAdvancedSubtensor:
     def setup_method(self):
         self.shared = shared
         self.dtype = config.floatX
-        self.mode = theano.compile.mode.get_default_mode()
+        self.mode = aesara.compile.mode.get_default_mode()
 
         self.s = iscalar()
         self.v = fvector()
@@ -1742,7 +1742,7 @@ def check(idx, y_val, x_val, true):
             )
             sym_idx = [tt.as_tensor_variable(ix) for ix in idx]
             expr = advanced_inc_subtensor(x, y, *sym_idx)
-            f = theano.function([y], expr, mode=self.mode)
+            f = aesara.function([y], expr, mode=self.mode)
             rval = f(y_val)
             assert np.allclose(rval, true)
 
@@ -1834,7 +1834,7 @@ def test_inc_adv_subtensor_w_matrix(self):
         a = inc_subtensor(subt, subt)
 
         assert a.type == self.v.type, (a.type, self.v.type)
-        f = theano.function(
+        f = aesara.function(
             [self.v, self.ix2], a, allow_input_downcast=True, mode=self.mode
         )
         aval = f([0.4, 0.9, 0.1], [[1, 2], [1, 2]])
@@ -1842,7 +1842,7 @@ def test_inc_adv_subtensor_w_matrix(self):
 
     def test_adv_subtensor_w_int_and_matrix(self):
         subt = self.ft4[0, :, self.ix2, :]
-        f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
+        f = aesara.function([self.ft4, self.ix2], subt, mode=self.mode)
         ft4v = np.random.random((2, 3, 4, 5)).astype("float32")
         ix2v = np.asarray([[0, 1], [1, 0]])
         aval = f(ft4v, ix2v)
@@ -1851,7 +1851,7 @@ def test_adv_subtensor_w_int_and_matrix(self):
 
     def test_adv_subtensor_w_none_and_matrix(self):
         subt = self.ft4[:, None, :, self.ix2, :]
-        f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
+        f = aesara.function([self.ft4, self.ix2], subt, mode=self.mode)
         ft4v = np.random.random((2, 3, 4, 5)).astype("float32")
         ix2v = np.asarray([[0, 1], [1, 0]])
         aval = f(ft4v, ix2v)
@@ -1860,7 +1860,7 @@ def test_adv_subtensor_w_none_and_matrix(self):
 
     def test_adv_subtensor_w_slice_and_matrix(self):
         subt = self.ft4[:, 0:1, self.ix2, :]
-        f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
+        f = aesara.function([self.ft4, self.ix2], subt, mode=self.mode)
         ft4v = np.random.random((2, 3, 4, 5)).astype("float32")
         ix2v = np.asarray([[0, 1], [1, 0]])
         aval = f(ft4v, ix2v)
@@ -1869,7 +1869,7 @@ def test_adv_subtensor_w_slice_and_matrix(self):
 
     def test_adv_subtensor_w_matrix_and_int(self):
         subt = self.ft4[:, :, self.ix2, 0]
-        f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
+        f = aesara.function([self.ft4, self.ix2], subt, mode=self.mode)
         ft4v = np.random.random((2, 3, 4, 5)).astype("float32")
         ix2v = np.asarray([[0, 1], [1, 0]])
         aval = f(ft4v, ix2v)
@@ -1878,7 +1878,7 @@ def test_adv_subtensor_w_matrix_and_int(self):
 
     def test_adv_subtensor_w_matrix_and_none(self):
         subt = self.ft4[:, :, self.ix2, None, :]
-        f = theano.function([self.ft4, self.ix2], subt, mode=self.mode)
+        f = aesara.function([self.ft4, self.ix2], subt, mode=self.mode)
         ft4v = np.random.random((2, 3, 4, 5)).astype("float32")
         ix2v = np.asarray([[0, 1], [1, 0]])
         aval = f(ft4v, ix2v)
@@ -1891,7 +1891,7 @@ def test_inc_adv_subtensor_w_2vec(self):
 
         typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
         assert a.type == typ, (a.type, typ)
-        f = theano.function(
+        f = aesara.function(
             [self.m, self.ix1, self.ix12], a, allow_input_downcast=True, mode=self.mode
         )
         aval = f([[0.4, 0.9, 0.1], [5, 6, 7], [0.5, 0.3, 0.15]], [1, 2, 1], [0, 1, 0])
@@ -1902,10 +1902,10 @@ def test_inc_adv_subtensor_w_2vec(self):
     def test_inc_adv_subtensor_with_broadcasting(self):
         inc = dscalar()
         a = inc_subtensor(self.m[self.ix1, self.ix12], inc)
-        g_inc = theano.grad(a.sum(), inc)
+        g_inc = aesara.grad(a.sum(), inc)
 
         assert a.type == self.m.type, (a.type, self.m.type)
-        f = theano.function(
+        f = aesara.function(
             [self.m, self.ix1, self.ix12, inc],
             [a, g_inc],
             allow_input_downcast=True,
@@ -1922,10 +1922,10 @@ def test_inc_adv_subtensor_with_broadcasting(self):
     def test_inc_adv_subtensor1_with_broadcasting(self):
         inc = dscalar()
         a = inc_subtensor(self.m[self.ix1], inc)
-        g_inc = theano.grad(a.sum(), inc)
+        g_inc = aesara.grad(a.sum(), inc)
 
         assert a.type == self.m.type, (a.type, self.m.type)
-        f = theano.function(
+        f = aesara.function(
             [self.m, self.ix1, inc],
             [a, g_inc],
             allow_input_downcast=True,
@@ -1946,7 +1946,7 @@ def test_inc_adv_subtensor_with_index_broadcasting(self):
         a = inc_subtensor(self.m[self.ix1, self.ix2], 2.1)
 
         assert a.type == self.m.type, (a.type, self.m.type)
-        f = theano.function(
+        f = aesara.function(
             [self.m, self.ix1, self.ix2], a, allow_input_downcast=True, mode=self.mode
         )
         aval = f(
@@ -1966,19 +1966,19 @@ def test_inc_adv_subtensor_with_index_broadcasting(self):
     def test_2d_3d_tensors(self):
         rng = np.random.RandomState(utt.fetch_seed())
         a = rng.uniform(size=(3, 3))
-        b = theano.shared(a)
+        b = aesara.shared(a)
         i = iscalar()
         j = iscalar()
         z = b[[i, j], :]
-        f1 = theano.function([i, j], z, mode=self.mode)
+        f1 = aesara.function([i, j], z, mode=self.mode)
         cmd = f1(0, 1) == a[[0, 1], :]
         assert cmd.all()
 
         aa = rng.uniform(size=(4, 2, 3))
-        bb = theano.shared(aa)
+        bb = aesara.shared(aa)
         k = iscalar()
         z = bb[[i, j, k], :, i:k]
-        f2 = theano.function([i, j, k], z, mode=self.mode)
+        f2 = aesara.function([i, j, k], z, mode=self.mode)
         cmd = f2(0, 1, 2) == aa[[0, 1, 2], :, 0:2]
         assert cmd.all()
 
@@ -1999,7 +1999,7 @@ def test_adv_sub_3d(self):
         r_idx = np.arange(xx.shape[1])[:, np.newaxis]
         c_idx = np.arange(xx.shape[2])[np.newaxis, :]
 
-        f = theano.function([X], X[b_idx, r_idx, c_idx], mode=self.mode)
+        f = aesara.function([X], X[b_idx, r_idx, c_idx], mode=self.mode)
         out = f(xx)
         utt.assert_allclose(out, xx[b_idx, r_idx, c_idx])
 
@@ -2007,7 +2007,7 @@ def test_adv_sub_slice(self):
         # Reported in https://github.com/Theano/Theano/issues/5898
         var = self.shared(np.zeros([3, 3], dtype=config.floatX))
         slc = slicetype()
-        f = theano.function([slc], var[slc], mode=self.mode)
+        f = aesara.function([slc], var[slc], mode=self.mode)
         s = slice(1, 3)
         f(s)
 
@@ -2020,7 +2020,7 @@ def test_adv_grouped(self):
         idx1 = self.shared(idx1_v)
         idx2 = tt.arange(4)
         out = var[:, idx1, idx2]
-        f = theano.function([], out, mode=self.mode)
+        f = aesara.function([], out, mode=self.mode)
         out_v = f()
         assert out_v.shape == (3, 5, 4)
 
diff --git a/tests/tensor/test_type.py b/tests/tensor/test_type.py
index 4566401822..2f39b08459 100644
--- a/tests/tensor/test_type.py
+++ b/tests/tensor/test_type.py
@@ -4,8 +4,8 @@
 import numpy as np
 import pytest
 
-from theano.configdefaults import config
-from theano.tensor.type import TensorType
+from aesara.configdefaults import config
+from aesara.tensor.type import TensorType
 
 
 def test_filter_variable():
diff --git a/tests/tensor/test_type_other.py b/tests/tensor/test_type_other.py
index 499b312d5d..344bc302f2 100644
--- a/tests/tensor/test_type_other.py
+++ b/tests/tensor/test_type_other.py
@@ -1,10 +1,10 @@
 """ This file don't test everything. It only test one past crash error."""
 
-import theano
-from theano.graph.basic import Constant
-from theano.tensor.math import argmax
-from theano.tensor.type import iscalar, vector
-from theano.tensor.type_other import MakeSlice, NoneConst, NoneTypeT, make_slice
+import aesara
+from aesara.graph.basic import Constant
+from aesara.tensor.math import argmax
+from aesara.tensor.type import iscalar, vector
+from aesara.tensor.type_other import MakeSlice, NoneConst, NoneTypeT, make_slice
 
 
 def test_make_slice_merge():
@@ -12,7 +12,7 @@ def test_make_slice_merge():
     i = iscalar()
     s1 = make_slice(0, i)
     s2 = make_slice(0, i)
-    f = theano.function([i], [s1, s2])
+    f = aesara.function([i], [s1, s2])
     nodes = f.maker.fgraph.apply_nodes
     assert len([n for n in nodes if isinstance(n.op, MakeSlice)]) == 1
 
@@ -34,13 +34,13 @@ def test_none_Constant():
     # This trigger equals that returned the wrong answer in the past.
     import pickle
 
-    import theano
+    import aesara
 
     x = vector("x")
     y = argmax(x)
     kwargs = {}
     # We can't pickle DebugMode
-    if theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
+    if aesara.config.mode in ["DebugMode", "DEBUG_MODE"]:
         kwargs = {"mode": "FAST_RUN"}
-    f = theano.function([x], [y], **kwargs)
+    f = aesara.function([x], [y], **kwargs)
     pickle.loads(pickle.dumps(f))
diff --git a/tests/tensor/test_utils.py b/tests/tensor/test_utils.py
index f83e40f6ad..f0fbd197a3 100644
--- a/tests/tensor/test_utils.py
+++ b/tests/tensor/test_utils.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pytest
 
-import theano.tensor as tt
-from theano.graph.fg import FunctionGraph
-from theano.tensor.type import matrix
-from theano.tensor.utils import hash_from_ndarray, shape_of_variables
+import aesara.tensor as tt
+from aesara.graph.fg import FunctionGraph
+from aesara.tensor.type import matrix
+from aesara.tensor.utils import hash_from_ndarray, shape_of_variables
 
 
 def test_hash_from_ndarray():
diff --git a/tests/tensor/test_var.py b/tests/tensor/test_var.py
index 350668e5a2..422f467e10 100644
--- a/tests/tensor/test_var.py
+++ b/tests/tensor/test_var.py
@@ -2,13 +2,13 @@
 import pytest
 from numpy.testing import assert_equal, assert_string_equal
 
+import aesara
 import tests.unittest_tools as utt
-import theano
-from theano.tensor.elemwise import DimShuffle
-from theano.tensor.subtensor import AdvancedSubtensor, AdvancedSubtensor1, Subtensor
-from theano.tensor.type import TensorType, dmatrix, iscalar, ivector, matrix
-from theano.tensor.type_other import MakeSlice
-from theano.tensor.var import TensorConstant
+from aesara.tensor.elemwise import DimShuffle
+from aesara.tensor.subtensor import AdvancedSubtensor, AdvancedSubtensor1, Subtensor
+from aesara.tensor.type import TensorType, dmatrix, iscalar, ivector, matrix
+from aesara.tensor.type_other import MakeSlice
+from aesara.tensor.var import TensorConstant
 
 
 @pytest.mark.parametrize(
@@ -45,7 +45,7 @@ def test_numpy_method(fct):
     data = np.random.rand(5, 5)
     x.tag.test_value = data
     y = fct(x)
-    f = theano.function([x], y)
+    f = aesara.function([x], y)
     utt.assert_allclose(np.nan_to_num(f(data)), np.nan_to_num(fct(data)))
 
 
@@ -56,8 +56,8 @@ def test_empty_list_indexing():
     x = dmatrix("x")
     y = x[:, []]
     z = x[:, ()]
-    fy = theano.function([x], y)
-    fz = theano.function([x], z)
+    fy = aesara.function([x], y)
+    fz = aesara.function([x], z)
     assert_equal(fy(data).shape, ynp.shape)
     assert_equal(fz(data).shape, znp.shape)
 
@@ -66,7 +66,7 @@ def test_copy():
     x = dmatrix("x")
     data = np.random.rand(5, 5)
     y = x.copy(name="y")
-    f = theano.function([x], y)
+    f = aesara.function([x], y)
     assert_equal(f(data), data)
     assert_string_equal(y.name, "y")
 
@@ -77,7 +77,7 @@ def test__getitem__Subtensor():
     i = iscalar("i")
 
     z = x[i]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == Subtensor
 
     # This should ultimately do nothing (i.e. just return `x`)
@@ -89,29 +89,29 @@ def test__getitem__Subtensor():
     # It lands in the `full_slices` condition in
     # `_tensor_py_operators.__getitem__`
     z = x[..., None]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert all(op_type == DimShuffle for op_type in op_types)
 
     z = x[None, :, None, :]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert all(op_type == DimShuffle for op_type in op_types)
 
     # This one lands in the non-`full_slices` condition in
     # `_tensor_py_operators.__getitem__`
     z = x[:i, :, None]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[1:] == [DimShuffle, Subtensor]
 
     z = x[:]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == Subtensor
 
     z = x[..., :]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == Subtensor
 
     z = x[..., i, :]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == Subtensor
 
 
@@ -120,24 +120,24 @@ def test__getitem__AdvancedSubtensor_bool():
     i = TensorType("bool", (False, False))("i")
 
     z = x[i]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == AdvancedSubtensor
 
     i = TensorType("bool", (False,))("i")
     z = x[:, i]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == AdvancedSubtensor
 
     i = TensorType("bool", (False,))("i")
     z = x[..., i]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == AdvancedSubtensor
 
     with pytest.raises(TypeError):
         z = x[[True, False], i]
 
     z = x[ivector("b"), i]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == AdvancedSubtensor
 
 
@@ -148,25 +148,25 @@ def test__getitem__AdvancedSubtensor():
 
     # This is a `__getitem__` call that's redirected to `_tensor_py_operators.take`
     z = x[i]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == AdvancedSubtensor1
 
     # This should index nothing (i.e. return an empty copy of `x`)
     # We check that the index is empty
     z = x[[]]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types == [AdvancedSubtensor1]
     assert isinstance(z.owner.inputs[1], TensorConstant)
 
     # This is also a `__getitem__` call that's redirected to `_tensor_py_operators.take`
     z = x[:, i]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types == [DimShuffle, AdvancedSubtensor1, DimShuffle]
 
     z = x[..., i, None]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types == [MakeSlice, AdvancedSubtensor]
 
     z = x[i, None]
-    op_types = [type(node.op) for node in theano.graph.basic.io_toposort([x, i], [z])]
+    op_types = [type(node.op) for node in aesara.graph.basic.io_toposort([x, i], [z])]
     assert op_types[-1] == AdvancedSubtensor
diff --git a/tests/tensor/test_xlogx.py b/tests/tensor/test_xlogx.py
index e36ea97468..f9539a8ef9 100644
--- a/tests/tensor/test_xlogx.py
+++ b/tests/tensor/test_xlogx.py
@@ -1,9 +1,9 @@
 import numpy.random
 
-import theano
+import aesara
+from aesara.tensor import as_tensor_variable
+from aesara.tensor.xlogx import xlogx, xlogy0
 from tests import unittest_tools as utt
-from theano.tensor import as_tensor_variable
-from theano.tensor.xlogx import xlogx, xlogy0
 
 
 class TestXlogX:
@@ -13,7 +13,7 @@ def setup_method(self):
     def test_basic(self):
         x = as_tensor_variable([1, 0])
         y = xlogx(x)
-        f = theano.function([], [y])
+        f = aesara.function([], [y])
         assert numpy.all(f() == numpy.asarray([0, 0.0]))
 
         # class Dummy(object):
@@ -32,5 +32,5 @@ def test_basic(self):
         x = as_tensor_variable([1, 0])
         y = as_tensor_variable([1, 0])
         z = xlogy0(x, y)
-        f = theano.function([], z)
+        f = aesara.function([], z)
         assert numpy.all(f() == numpy.asarray([0, 0.0]))
diff --git a/tests/tensor/utils.py b/tests/tensor/utils.py
index 2cade90c57..3cc7927150 100644
--- a/tests/tensor/utils.py
+++ b/tests/tensor/utils.py
@@ -6,14 +6,14 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara import function, shared
+from aesara.compile.mode import get_default_mode
+from aesara.configdefaults import config
+from aesara.graph.utils import MethodNotDefined
+from aesara.misc.safe_asarray import _asarray
+from aesara.tensor.type import TensorType, complex_dtypes, discrete_dtypes, float_dtypes
 from tests import unittest_tools as utt
-from theano import function, shared
-from theano.compile.mode import get_default_mode
-from theano.configdefaults import config
-from theano.graph.utils import MethodNotDefined
-from theano.misc.safe_asarray import _asarray
-from theano.tensor.type import TensorType, complex_dtypes, discrete_dtypes, float_dtypes
 
 
 # Used to exclude random numbers too close to certain values
@@ -57,7 +57,7 @@
 REAL_DTYPES = ALL_DTYPES[:6]
 COMPLEX_DTYPES = ALL_DTYPES[-2:]
 
-ignore_isfinite_mode = copy(theano.compile.get_default_mode())
+ignore_isfinite_mode = copy(aesara.compile.get_default_mode())
 ignore_isfinite_mode.check_isfinite = False
 
 
@@ -134,7 +134,7 @@ def get_numeric_subclasses(cls=np.number, ignore=None):
 
 
 def get_numeric_types(
-    with_int=True, with_float=True, with_complex=False, only_theano_types=True
+    with_int=True, with_float=True, with_complex=False, only_aesara_types=True
 ):
     # Return numpy numeric data types.
     #
@@ -144,20 +144,20 @@ def get_numeric_types(
     #
     # :param with_complex: Whether to include complex types.
     #
-    # :param only_theano_types: If True, then numpy numeric data types that are
-    # not supported by Theano are ignored (i.e. those that are not declared in
+    # :param only_aesara_types: If True, then numpy numeric data types that are
+    # not supported by Aesara are ignored (i.e. those that are not declared in
     # scalar/basic.py).
     #
     # :returns: A list of unique data type objects. Note that multiple data types
     # may share the same string representation, but can be differentiated through
     # their `num` attribute.
     #
-    # Note that when `only_theano_types` is True we could simply return the list
+    # Note that when `only_aesara_types` is True we could simply return the list
     # of types defined in the `scalar` module. However with this function we can
     # test more unique dtype objects, and in the future we may use it to
     # automatically detect new data types introduced in numpy.
-    if only_theano_types:
-        theano_types = [d.dtype for d in theano.scalar.all_types]
+    if only_aesara_types:
+        aesara_types = [d.dtype for d in aesara.scalar.all_types]
     rval = []
 
     def is_within(cls1, cls2):
@@ -179,7 +179,7 @@ def is_within(cls1, cls2):
             (not with_complex and is_within(cls, np.complexfloating))
             or (not with_int and is_within(cls, np.integer))
             or (not with_float and is_within(cls, np.floating))
-            or (only_theano_types and dtype not in theano_types)
+            or (only_aesara_types and dtype not in aesara_types)
         ):
             # Ignore this class.
             continue
@@ -320,7 +320,7 @@ def check_floatX(inputs, rval):
     # :param rval: Value returned by a function with inputs set to `inputs`.
     #
     # :returns: Either `rval` unchanged, or `rval` cast in float32. The idea is
-    # that when a numpy function would have returned a float64, Theano may prefer
+    # that when a numpy function would have returned a float64, Aesara may prefer
     # to return a float32 instead when `config.cast_policy` is set to
     # 'numpy+floatX' and config.floatX to 'float32', and there was no float64
     # input.
@@ -381,7 +381,7 @@ def makeTester(
     grad_eps=None,
 ):
     # :param check_name:
-    #     Use only for tester that aren't in Theano.
+    #     Use only for tester that aren't in Aesara.
     if checks is None:
         checks = {}
     if good is None:
diff --git a/tests/test_breakpoint.py b/tests/test_breakpoint.py
index b59d72b610..cade625011 100644
--- a/tests/test_breakpoint.py
+++ b/tests/test_breakpoint.py
@@ -1,10 +1,10 @@
 import numpy as np
 
-import theano
+import aesara
+from aesara.breakpoint import PdbBreakpoint
+from aesara.tensor.math import dot, gt
+from aesara.tensor.type import fmatrix, fscalar
 from tests import unittest_tools as utt
-from theano.breakpoint import PdbBreakpoint
-from theano.tensor.math import dot, gt
-from theano.tensor.type import fmatrix, fscalar
 
 
 class TestPdbBreakpoint(utt.InferShapeTester):
@@ -46,14 +46,14 @@ def test_grad(self):
         input2_value = 10.0
 
         grads = [
-            theano.grad(self.monitored_input1.sum(), self.input1),
-            theano.grad(self.monitored_input2.sum(), self.input2),
+            aesara.grad(self.monitored_input1.sum(), self.input1),
+            aesara.grad(self.monitored_input2.sum(), self.input2),
         ]
 
-        # Add self.monitored_input1 as an output to the Theano function to
-        # prevent Theano from optimizing the PdbBreakpoint op out of the
+        # Add self.monitored_input1 as an output to the Aesara function to
+        # prevent Aesara from optimizing the PdbBreakpoint op out of the
         # function graph
-        fct = theano.function(
+        fct = aesara.function(
             [self.input1, self.input2], grads + [self.monitored_input1]
         )
 
@@ -71,7 +71,7 @@ def test_fprop(self):
 
         input1_value = np.arange(9).reshape(3, 3).astype("float32")
         input2_value = 10.0
-        fct = theano.function(
+        fct = aesara.function(
             [self.input1, self.input2], [self.monitored_input1, self.monitored_input2]
         )
 
diff --git a/tests/test_config.py b/tests/test_config.py
index 439f322d05..55015bbb5a 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -5,16 +5,16 @@
 
 import pytest
 
-from theano import configdefaults, configparser
-from theano.configdefaults import short_platform
-from theano.configparser import ConfigParam
+from aesara import configdefaults, configparser
+from aesara.configdefaults import short_platform
+from aesara.configparser import ConfigParam
 
 
 def _create_test_config():
-    return configparser.TheanoConfigParser(
+    return configparser.AesaraConfigParser(
         flags_dict={},
-        theano_cfg=stdlib_configparser.ConfigParser(),
-        theano_raw_cfg=stdlib_configparser.RawConfigParser(),
+        aesara_cfg=stdlib_configparser.ConfigParser(),
+        aesara_raw_cfg=stdlib_configparser.RawConfigParser(),
     )
 
 
@@ -65,7 +65,7 @@ def test_api_redirect():
 
 
 def test_invalid_default():
-    # Ensure an invalid default value found in the Theano code only causes
+    # Ensure an invalid default value found in the Aesara code only causes
     # a crash if it is not overridden by the user.
 
     root = _create_test_config()
@@ -250,11 +250,11 @@ def test_mode_apply():
         configdefaults._filter_mode("not_a_mode")
 
     # test with Mode instance
-    import theano.compile.mode
+    import aesara.compile.mode
 
     assert (
-        configdefaults._filter_mode(theano.compile.mode.FAST_COMPILE)
-        == theano.compile.mode.FAST_COMPILE
+        configdefaults._filter_mode(aesara.compile.mode.FAST_COMPILE)
+        == aesara.compile.mode.FAST_COMPILE
     )
 
 
diff --git a/tests/test_determinism.py b/tests/test_determinism.py
index 1b1ac2659a..4ddd20ca4f 100644
--- a/tests/test_determinism.py
+++ b/tests/test_determinism.py
@@ -4,11 +4,11 @@
 
 import numpy as np
 
-import theano
+import aesara
+from aesara import shared
+from aesara.configdefaults import config
+from aesara.printing import var_descriptor
 from tests.record import Record, RecordMode
-from theano import shared
-from theano.configdefaults import config
-from theano.printing import var_descriptor
 
 
 __authors__ = "Ian Goodfellow" "PyMC Developers"
@@ -78,14 +78,14 @@ def run(replay, log=None):
             s = sharedX(0.0, name="s_" + str(i))
             updates.append((s, val))
 
-        for var in theano.graph.basic.ancestors(update for _, update in updates):
+        for var in aesara.graph.basic.ancestors(update for _, update in updates):
             if var.name is not None and var.name != "b":
                 if var.name[0] != "s" or len(var.name) != 2:
                     var.name = None
 
         for key in channels:
             updates.append((s, channels[key]))
-        f = theano.function(
+        f = aesara.function(
             [], mode=mode, updates=updates, on_unused_input="ignore", name="f"
         )
         for output in f.maker.fgraph.outputs:
diff --git a/tests/test_dictionary_output.py b/tests/test_dictionary_output.py
index 85cbd52bff..9074920459 100644
--- a/tests/test_dictionary_output.py
+++ b/tests/test_dictionary_output.py
@@ -1,15 +1,15 @@
 import pytest
 
-import theano
-from theano.tensor.type import scalar
+import aesara
+from aesara.tensor.type import scalar
 
 
 class TestDictionaryOutput:
     def test_output_dictionary(self):
-        # Tests that theano.function works when outputs is a dictionary
+        # Tests that aesara.function works when outputs is a dictionary
 
         x = scalar()
-        f = theano.function([x], outputs={"a": x, "c": x * 2, "b": x * 3, "1": x * 4})
+        f = aesara.function([x], outputs={"a": x, "c": x * 2, "b": x * 3, "1": x * 4})
 
         outputs = f(10.0)
 
@@ -24,7 +24,7 @@ def test_input_named_variables(self):
         x = scalar("x")
         y = scalar("y")
 
-        f = theano.function([x, y], outputs={"a": x + y, "b": x * y})
+        f = aesara.function([x, y], outputs={"a": x + y, "b": x * y})
 
         assert f(2, 4) == {"a": 6, "b": 8}
         assert f(2, y=4) == f(2, 4)
@@ -39,7 +39,7 @@ def test_output_order_sorted(self):
         e1 = scalar("1")
         e2 = scalar("2")
 
-        f = theano.function(
+        f = aesara.function(
             [x, y, z, e1, e2], outputs={"x": x, "y": y, "z": z, "1": e1, "2": e2}
         )
 
@@ -50,7 +50,7 @@ def test_output_order_sorted(self):
         assert "z" in str(f.outputs[4])
 
     def test_composing_function(self):
-        # Tests that one can compose two theano functions when the outputs are
+        # Tests that one can compose two aesara functions when the outputs are
         # provided in a dictionary.
 
         x = scalar("x")
@@ -59,7 +59,7 @@ def test_composing_function(self):
         a = x + y
         b = x * y
 
-        f = theano.function([x, y], outputs={"a": a, "b": b})
+        f = aesara.function([x, y], outputs={"a": a, "b": b})
 
         a = scalar("a")
         b = scalar("b")
@@ -67,7 +67,7 @@ def test_composing_function(self):
         l = a + b
         r = a * b
 
-        g = theano.function([a, b], outputs=[l, r])
+        g = aesara.function([a, b], outputs=[l, r])
 
         result = g(**f(5, 7))
 
@@ -75,11 +75,11 @@ def test_composing_function(self):
         assert result[1] == 420.0
 
     def test_output_list_still_works(self):
-        # Test that theano.function works if outputs is a list.
+        # Test that aesara.function works if outputs is a list.
 
         x = scalar("x")
 
-        f = theano.function([x], outputs=[x * 3, x * 2, x * 4, x])
+        f = aesara.function([x], outputs=[x * 3, x * 2, x * 4, x])
 
         result = f(5.0)
 
@@ -93,7 +93,7 @@ def test_debug_mode_dict(self):
 
         x = scalar("x")
 
-        f = theano.function(
+        f = aesara.function(
             [x], outputs={"1": x, "2": 2 * x, "3": 3 * x}, mode="DEBUG_MODE"
         )
 
@@ -108,7 +108,7 @@ def test_debug_mode_list(self):
 
         x = scalar("x")
 
-        f = theano.function([x], outputs=[x, 2 * x, 3 * x], mode="DEBUG_MODE")
+        f = aesara.function([x], outputs=[x, 2 * x, 3 * x], mode="DEBUG_MODE")
 
         result = f(5.0)
 
@@ -122,10 +122,10 @@ def test_key_string_requirement(self):
         x = scalar("x")
 
         with pytest.raises(AssertionError):
-            theano.function([x], outputs={1.0: x})
+            aesara.function([x], outputs={1.0: x})
 
         with pytest.raises(AssertionError):
-            theano.function([x], outputs={1.0: x, "a": x ** 2})
+            aesara.function([x], outputs={1.0: x, "a": x ** 2})
 
         with pytest.raises(AssertionError):
-            theano.function([x], outputs={(1, "b"): x, 1.0: x ** 2})
+            aesara.function([x], outputs={(1, "b"): x, 1.0: x ** 2})
diff --git a/tests/test_gradient.py b/tests/test_gradient.py
index 23064c1baa..a6b757e71c 100644
--- a/tests/test_gradient.py
+++ b/tests/test_gradient.py
@@ -3,11 +3,10 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor.basic as tt
-from tests import unittest_tools as utt
-from theano.configdefaults import config
-from theano.gradient import (
+import aesara
+import aesara.tensor.basic as tt
+from aesara.configdefaults import config
+from aesara.gradient import (
     DisconnectedInputError,
     DisconnectedType,
     GradClip,
@@ -30,14 +29,14 @@
     zero_grad,
     zero_grad_,
 )
-from theano.graph.basic import Apply, graph_inputs
-from theano.graph.null_type import NullType
-from theano.graph.op import Op
-from theano.sandbox.rng_mrg import MRG_RandomStream
-from theano.tensor.math import add, dot, exp, sqr
-from theano.tensor.math import sum as tt_sum
-from theano.tensor.math import tanh
-from theano.tensor.type import (
+from aesara.graph.basic import Apply, graph_inputs
+from aesara.graph.null_type import NullType
+from aesara.graph.op import Op
+from aesara.sandbox.rng_mrg import MRG_RandomStream
+from aesara.tensor.math import add, dot, exp, sqr
+from aesara.tensor.math import sum as tt_sum
+from aesara.tensor.math import tanh
+from aesara.tensor.type import (
     discrete_dtypes,
     dmatrix,
     dscalar,
@@ -50,6 +49,7 @@
     scalar,
     vector,
 )
+from tests import unittest_tools as utt
 
 
 one = tt.as_tensor_variable(1.0)
@@ -241,7 +241,7 @@ def test_1param(self):
         # grad: Test passing a single variable param
         o = TestGrad.Obj1()
         a1 = o.make_node()
-        assert o.gval0 is theano.grad(a1.outputs[0], a1.inputs[0])
+        assert o.gval0 is aesara.grad(a1.outputs[0], a1.inputs[0])
 
     def test_Nparam(self):
         # grad: Test passing multiple variable params
@@ -253,9 +253,9 @@ def test_Nparam(self):
         assert o.gval1 is g1
 
     def test_grad_keep_type(self):
-        # Tests that the theano grad method returns a list if it is passed a list
+        # Tests that the aesara grad method returns a list if it is passed a list
         # and a single variable if it is passed a single variable.
-        # pylearn2 depends on theano behaving this way. This functionality has been
+        # pylearn2 depends on aesara behaving this way. This functionality has been
         # added three times and erroneously removed twice. If you do anything that
         # requires changing this test or making it fail you are almost certainly
         # making a common mistake, NOT fixing something.
@@ -263,11 +263,11 @@ def test_grad_keep_type(self):
         X = matrix()
         y = X.sum()
 
-        G = theano.grad(y, [X])
+        G = aesara.grad(y, [X])
 
         assert isinstance(G, list)
 
-        G = theano.grad(y, X)
+        G = aesara.grad(y, X)
 
         assert not isinstance(G, list)
 
@@ -296,7 +296,7 @@ def test_NNone_rval(self):
     def test_zero_gradient_shape(self):
         # Ensure that a zero gradient has the proper shape.
         x = dmatrix()
-        f = theano.function([x], grad(dscalar(), x, disconnected_inputs="ignore"))
+        f = aesara.function([x], grad(dscalar(), x, disconnected_inputs="ignore"))
         a = np.ones((3, 7))
         assert (f(a) == 0).all()  # Zero gradient
         assert a.shape == f(a).shape  # With proper shape
@@ -318,14 +318,14 @@ def test_unimplemented_grad_func(self):
         a = vector()
         b = grad_not_implemented(add, 0, a)
         with pytest.raises(TypeError):
-            theano.function([a], b, on_unused_input="ignore")
+            aesara.function([a], b, on_unused_input="ignore")
 
     def test_undefined_grad_func(self):
         # tests that function compilation catches undefined grads in the graph
         a = vector()
         b = grad_undefined(add, 0, a)
         with pytest.raises(TypeError):
-            theano.function([a], b, on_unused_input="ignore")
+            aesara.function([a], b, on_unused_input="ignore")
 
     def test_unimplemented_grad_grad(self):
         # tests that unimplemented grads are caught in the grad method
@@ -466,10 +466,10 @@ def test_grad_int(self):
 
         def make_grad_func(X):
             Z = dot(X, W) + b
-            H = theano.tensor.nnet.sigmoid(Z)
+            H = aesara.tensor.nnet.sigmoid(Z)
             cost = H.sum()
             g = grad(cost, X)
-            return theano.function([X, W, b], g, on_unused_input="ignore")
+            return aesara.function([X, W, b], g, on_unused_input="ignore")
 
         int_func = make_grad_func(imatrix())
         # we have to use float64 as the float type to get the results to match
@@ -510,7 +510,7 @@ def test_grad_disconnected(self):
         g = grad(cost, x, add_names=False)
         # we still need to pass in x because it determines the shape of
         # the output
-        f = theano.function([x], g)
+        f = aesara.function([x], g)
         rng = np.random.RandomState([2012, 9, 5])
         x = np.cast[x.dtype](rng.randn(3))
         g = f(x)
@@ -556,7 +556,7 @@ def perform(self, *args, **kwargs):
 
         # cost is differentiable wrt x
         # but we can't tell that without using Op1's connection pattern
-        # looking at the theano graph alone, g is an ancestor of cost
+        # looking at the aesara graph alone, g is an ancestor of cost
         # and has x as an ancestor, so we must compute its gradient
 
         g = grad(cost, x)
@@ -595,7 +595,7 @@ def test_grad_constant(self):
         g_x = grad(z_x, x, consider_constant=[x])
         g_one = grad(z_one, one)
 
-        f = theano.function([x, y], [g_x, g_one])
+        f = aesara.function([x, y], [g_x, g_one])
 
         g_x, g_one = f(1, 0.5)
 
@@ -637,7 +637,7 @@ def test_known_grads():
     values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)]
 
     true_grads = grad(cost, inputs, disconnected_inputs="ignore")
-    true_grads = theano.function(inputs, true_grads)
+    true_grads = aesara.function(inputs, true_grads)
     true_grads = true_grads(*values)
 
     for layer in layers:
@@ -646,7 +646,7 @@ def test_known_grads():
         full = grad(
             cost=None, known_grads=known, wrt=inputs, disconnected_inputs="ignore"
         )
-        full = theano.function(inputs, full)
+        full = aesara.function(inputs, full)
         full = full(*values)
         assert len(true_grads) == len(full)
         for a, b, var in zip(true_grads, full, inputs):
@@ -660,7 +660,7 @@ def test_dxdx():
     # of the gradient as defined in the Op contract, it should be 1.
     # If you feel the need to change this unit test you are probably
     # modifying the Op contract and should definitely get the approval
-    # of multiple people on theano-dev.
+    # of multiple people on aesara-dev.
 
     x = iscalar()
     g = grad(x, x)
@@ -678,7 +678,7 @@ def test_known_grads_integers():
 
     g_grad = grad(cost=None, known_grads={x: g_expected}, wrt=x)
 
-    f = theano.function([g_expected], g_grad)
+    f = aesara.function([g_expected], g_grad)
 
     x = -3
     gv = np.cast[config.floatX](0.6)
@@ -732,8 +732,8 @@ def test_subgraph_grad():
 
     x = fvector("x")
     t = fvector("t")
-    w1 = theano.shared(np.random.randn(3, 4))
-    w2 = theano.shared(np.random.randn(4, 2))
+    w1 = aesara.shared(np.random.randn(3, 4))
+    w2 = aesara.shared(np.random.randn(4, 2))
     a1 = tanh(dot(x, w1))
     a2 = tanh(dot(a1, w2))
     cost2 = sqr(a2 - t).sum()
@@ -752,7 +752,7 @@ def test_subgraph_grad():
     wrt = [w2, w1]
     cost = cost2 + cost1
     true_grads = grad(cost, wrt)
-    true_grads = theano.function(inputs, true_grads)
+    true_grads = aesara.function(inputs, true_grads)
     true_grads = true_grads(*values)
     next_grad = None
     param_grads = []
@@ -763,7 +763,7 @@ def test_subgraph_grad():
         next_grad = OrderedDict(zip(grad_ends[i], next_grad))
         param_grads.extend(param_grad)
 
-    pgrads = theano.function(inputs, param_grads)
+    pgrads = aesara.function(inputs, param_grads)
     pgrads = pgrads(*values)
 
     for true_grad, pgrad in zip(true_grads, pgrads):
@@ -778,9 +778,9 @@ def setup_method(self):
     def test_op_removed(self):
         x = matrix("x")
         y = x * consider_constant(x)
-        f = theano.function([x], y)
-        # need to refer to theano.consider_constant_ here,
-        # theano.consider_constant is a wrapper function!
+        f = aesara.function([x], y)
+        # need to refer to aesara.consider_constant_ here,
+        # aesara.consider_constant is a wrapper function!
         assert consider_constant_ not in [node.op for node in f.maker.fgraph.toposort()]
 
     def test_grad(self):
@@ -797,10 +797,10 @@ def test_grad(self):
 
         for expr, expr_grad in expressions_gradients:
             g = grad(expr.sum(), x)
-            # gradient according to theano
-            f = theano.function([x], g, on_unused_input="ignore")
+            # gradient according to aesara
+            f = aesara.function([x], g, on_unused_input="ignore")
             # desired gradient
-            f2 = theano.function([x], expr_grad, on_unused_input="ignore")
+            f2 = aesara.function([x], expr_grad, on_unused_input="ignore")
 
             assert np.allclose(f(a), f2(a))
 
@@ -813,9 +813,9 @@ def setup_method(self):
     def test_op_removed(self):
         x = matrix("x")
         y = x * zero_grad(x)
-        f = theano.function([x], y)
-        # need to refer to theano.zero_grad here,
-        # theano.zero_grad is a wrapper function!
+        f = aesara.function([x], y)
+        # need to refer to aesara.zero_grad here,
+        # aesara.zero_grad is a wrapper function!
         assert zero_grad_ not in [node.op for node in f.maker.fgraph.toposort()]
 
     def test_grad(self):
@@ -832,10 +832,10 @@ def test_grad(self):
 
         for expr, expr_grad in expressions_gradients:
             g = grad(expr.sum(), x)
-            # gradient according to theano
-            f = theano.function([x], g, on_unused_input="ignore")
+            # gradient according to aesara
+            f = aesara.function([x], g, on_unused_input="ignore")
             # desired gradient
-            f2 = theano.function([x], expr_grad, on_unused_input="ignore")
+            f2 = aesara.function([x], expr_grad, on_unused_input="ignore")
 
             assert np.allclose(f(a), f2(a))
 
@@ -845,7 +845,7 @@ def test_rop(self):
         y = zero_grad(x)
 
         rop = Rop(y, x, v)
-        f = theano.function([x, v], rop, on_unused_input="ignore")
+        f = aesara.function([x, v], rop, on_unused_input="ignore")
 
         a = np.asarray(self.rng.randn(5), dtype=config.floatX)
         u = np.asarray(self.rng.randn(5), dtype=config.floatX)
@@ -861,9 +861,9 @@ def setup_method(self):
     def test_op_removed(self):
         x = matrix("x")
         y = x * disconnected_grad(x)
-        f = theano.function([x], y)
-        # need to refer to theano.disconnected_grad here,
-        # theano.disconnected_grad is a wrapper function!
+        f = aesara.function([x], y)
+        # need to refer to aesara.disconnected_grad here,
+        # aesara.disconnected_grad is a wrapper function!
         assert disconnected_grad_ not in [node.op for node in f.maker.fgraph.toposort()]
 
     def test_grad(self):
@@ -879,10 +879,10 @@ def test_grad(self):
 
         for expr, expr_grad in expressions_gradients:
             g = grad(expr.sum(), x)
-            # gradient according to theano
-            f = theano.function([x], g, on_unused_input="ignore")
+            # gradient according to aesara
+            f = aesara.function([x], g, on_unused_input="ignore")
             # desired gradient
-            f2 = theano.function([x], expr_grad, on_unused_input="ignore")
+            f2 = aesara.function([x], expr_grad, on_unused_input="ignore")
 
             assert np.allclose(f(a), f2(a))
 
@@ -926,7 +926,7 @@ def test_grad_clip():
     z = grad(grad_clip(x, -1, 1) ** 2, x)
     z2 = grad(x ** 2, x)
 
-    f = theano.function([x], outputs=[z, z2])
+    f = aesara.function([x], outputs=[z, z2])
 
     if config.mode != "FAST_COMPILE":
         topo = f.maker.fgraph.toposort()
@@ -942,7 +942,7 @@ def test_grad_scale():
     z = grad(grad_scale(x, 2) ** 2, x)
     z2 = grad(x ** 2, x)
 
-    f = theano.function([x], outputs=[z, z2])
+    f = aesara.function([x], outputs=[z, z2])
 
     if config.mode != "FAST_COMPILE":
         topo = f.maker.fgraph.toposort()
@@ -957,7 +957,7 @@ def test_undefined_grad_opt():
     # Make sure that undefined grad get removed in optimized graph.
     random = MRG_RandomStream(np.random.randint(1, 2147462579))
 
-    pvals = theano.shared(np.random.rand(10, 20).astype(config.floatX))
+    pvals = aesara.shared(np.random.rand(10, 20).astype(config.floatX))
     pvals = pvals / pvals.sum(axis=1)
     pvals = zero_grad(pvals)
 
@@ -968,7 +968,7 @@ def test_undefined_grad_opt():
     cost = tt_sum(samples + pvals)
     grad_res = grad(cost, samples)
 
-    f = theano.function([], grad_res)
+    f = aesara.function([], grad_res)
     assert not any(
         [isinstance(node.op, UndefinedGrad) for node in f.maker.fgraph.apply_nodes]
     )
@@ -981,31 +981,31 @@ def test_jacobian_vector():
 
     # test when the jacobian is called with a tensor as wrt
     Jx = jacobian(y, x)
-    f = theano.function([x], Jx)
-    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
+    f = aesara.function([x], Jx)
+    vx = rng.uniform(size=(10,)).astype(aesara.config.floatX)
     assert np.allclose(f(vx), np.eye(10) * 2)
 
     # test when the jacobian is called with a tuple as wrt
     Jx = jacobian(y, (x,))
     assert isinstance(Jx, tuple)
-    f = theano.function([x], Jx[0])
-    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
+    f = aesara.function([x], Jx[0])
+    vx = rng.uniform(size=(10,)).astype(aesara.config.floatX)
     assert np.allclose(f(vx), np.eye(10) * 2)
 
     # test when the jacobian is called with a list as wrt
     Jx = jacobian(y, [x])
     assert isinstance(Jx, list)
-    f = theano.function([x], Jx[0])
-    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
+    f = aesara.function([x], Jx[0])
+    vx = rng.uniform(size=(10,)).astype(aesara.config.floatX)
     assert np.allclose(f(vx), np.eye(10) * 2)
 
     # test when the jacobian is called with a list of two elements
     z = vector()
     y = x * z
     Js = jacobian(y, [x, z])
-    f = theano.function([x, z], Js)
-    vx = rng.uniform(size=(10,)).astype(theano.config.floatX)
-    vz = rng.uniform(size=(10,)).astype(theano.config.floatX)
+    f = aesara.function([x, z], Js)
+    vx = rng.uniform(size=(10,)).astype(aesara.config.floatX)
+    vz = rng.uniform(size=(10,)).astype(aesara.config.floatX)
     vJs = f(vx, vz)
     evx = np.zeros((10, 10))
     evz = np.zeros((10, 10))
@@ -1025,31 +1025,31 @@ def test_jacobian_matrix():
 
     # test when the jacobian is called with a tensor as wrt
     Jx = jacobian(y, x)
-    f = theano.function([x], Jx)
-    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
+    f = aesara.function([x], Jx)
+    vx = rng.uniform(size=(10, 10)).astype(aesara.config.floatX)
     assert np.allclose(f(vx), ev)
 
     # test when the jacobian is called with a tuple as wrt
     Jx = jacobian(y, (x,))
     assert isinstance(Jx, tuple)
-    f = theano.function([x], Jx[0])
-    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
+    f = aesara.function([x], Jx[0])
+    vx = rng.uniform(size=(10, 10)).astype(aesara.config.floatX)
     assert np.allclose(f(vx), ev)
 
     # test when the jacobian is called with a list as wrt
     Jx = jacobian(y, [x])
     assert isinstance(Jx, list)
-    f = theano.function([x], Jx[0])
-    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
+    f = aesara.function([x], Jx[0])
+    vx = rng.uniform(size=(10, 10)).astype(aesara.config.floatX)
     assert np.allclose(f(vx), ev)
 
     # test when the jacobian is called with a list of two elements
     z = matrix()
     y = (x * z).sum(axis=1)
     Js = jacobian(y, [x, z])
-    f = theano.function([x, z], Js)
-    vx = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
-    vz = rng.uniform(size=(10, 10)).astype(theano.config.floatX)
+    f = aesara.function([x, z], Js)
+    vx = rng.uniform(size=(10, 10)).astype(aesara.config.floatX)
+    vz = rng.uniform(size=(10, 10)).astype(aesara.config.floatX)
     vJs = f(vx, vz)
     evx = np.zeros((10, 10, 10))
     evz = np.zeros((10, 10, 10))
@@ -1067,31 +1067,31 @@ def test_jacobian_scalar():
 
     # test when the jacobian is called with a tensor as wrt
     Jx = jacobian(y, x)
-    f = theano.function([x], Jx)
-    vx = np.cast[theano.config.floatX](rng.uniform())
+    f = aesara.function([x], Jx)
+    vx = np.cast[aesara.config.floatX](rng.uniform())
     assert np.allclose(f(vx), 2)
 
     # test when the jacobian is called with a tuple as wrt
     Jx = jacobian(y, (x,))
     assert isinstance(Jx, tuple)
-    f = theano.function([x], Jx[0])
-    vx = np.cast[theano.config.floatX](rng.uniform())
+    f = aesara.function([x], Jx[0])
+    vx = np.cast[aesara.config.floatX](rng.uniform())
     assert np.allclose(f(vx), 2)
 
     # test when the jacobian is called with a list as wrt
     Jx = jacobian(y, [x])
     assert isinstance(Jx, list)
-    f = theano.function([x], Jx[0])
-    vx = np.cast[theano.config.floatX](rng.uniform())
+    f = aesara.function([x], Jx[0])
+    vx = np.cast[aesara.config.floatX](rng.uniform())
     assert np.allclose(f(vx), 2)
 
     # test when the jacobian is called with a list of two elements
     z = scalar()
     y = x * z
     Jx = jacobian(y, [x, z])
-    f = theano.function([x, z], Jx)
-    vx = np.cast[theano.config.floatX](rng.uniform())
-    vz = np.cast[theano.config.floatX](rng.uniform())
+    f = aesara.function([x, z], Jx)
+    vx = np.cast[aesara.config.floatX](rng.uniform())
+    vz = np.cast[aesara.config.floatX](rng.uniform())
     vJx = f(vx, vz)
 
     assert np.allclose(vJx[0], vz)
@@ -1102,8 +1102,8 @@ def test_hessian():
     x = vector()
     y = tt_sum(x ** 2)
     Hx = hessian(y, x)
-    f = theano.function([x], Hx)
-    vx = np.arange(10).astype(theano.config.floatX)
+    f = aesara.function([x], Hx)
+    vx = np.arange(10).astype(aesara.config.floatX)
     assert np.allclose(f(vx), np.eye(10) * 2)
 
 
@@ -1112,14 +1112,14 @@ def test_jacobian_disconnected_inputs():
 
     v1 = vector()
     v2 = vector()
-    jacobian_v = theano.gradient.jacobian(1 + v1, v2, disconnected_inputs="ignore")
-    func_v = theano.function([v1, v2], jacobian_v)
-    val = np.arange(4.0).astype(theano.config.floatX)
+    jacobian_v = aesara.gradient.jacobian(1 + v1, v2, disconnected_inputs="ignore")
+    func_v = aesara.function([v1, v2], jacobian_v)
+    val = np.arange(4.0).astype(aesara.config.floatX)
     assert np.allclose(func_v(val, val), np.zeros((4, 4)))
 
     s1 = scalar()
     s2 = scalar()
-    jacobian_s = theano.gradient.jacobian(1 + s1, s2, disconnected_inputs="ignore")
-    func_s = theano.function([s2], jacobian_s)
-    val = np.array(1.0).astype(theano.config.floatX)
+    jacobian_s = aesara.gradient.jacobian(1 + s1, s2, disconnected_inputs="ignore")
+    func_s = aesara.function([s2], jacobian_s)
+    val = np.array(1.0).astype(aesara.config.floatX)
     assert np.allclose(func_s(val), np.zeros(1))
diff --git a/tests/test_ifelse.py b/tests/test_ifelse.py
index 29445b5940..1e9d93c866 100644
--- a/tests/test_ifelse.py
+++ b/tests/test_ifelse.py
@@ -4,18 +4,18 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.ifelse
-import theano.tensor.basic as tt
+import aesara
+import aesara.ifelse
+import aesara.tensor.basic as tt
+from aesara import function
+from aesara.compile.mode import Mode, get_mode
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.graph.type import generic
+from aesara.ifelse import IfElse, ifelse
+from aesara.tensor.math import eq
+from aesara.tensor.type import col, iscalar, matrix, row, scalar, tensor3, vector
 from tests import unittest_tools as utt
-from theano import function
-from theano.compile.mode import Mode, get_mode
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.graph.type import generic
-from theano.ifelse import IfElse, ifelse
-from theano.tensor.math import eq
-from theano.tensor.type import col, iscalar, matrix, row, scalar, tensor3, vector
 
 
 __docformat__ = "restructedtext en"
@@ -25,12 +25,12 @@
 
 class TestIfelse(utt.OptimizationTestMixin):
     mode = None
-    dtype = theano.config.floatX
+    dtype = aesara.config.floatX
     cast_output = staticmethod(tt.as_tensor_variable)
-    shared = staticmethod(theano.shared)
+    shared = staticmethod(aesara.shared)
 
     def get_ifelse(self, n):
-        if theano.config.mode == "FAST_COMPILE":
+        if aesara.config.mode == "FAST_COMPILE":
             return IfElse(n)
         else:
             return IfElse(n, as_view=True)
@@ -130,7 +130,7 @@ def test_grad_lazy_if(self):
         y = vector("y", dtype=self.dtype)
         c = iscalar("c")
         z = ifelse(c, x, y)
-        gx, gy = theano.grad(z.sum(), [x, y])
+        gx, gy = aesara.grad(z.sum(), [x, y])
 
         f = function(
             [c, x, y], [self.cast_output(gx), self.cast_output(gy)], mode=self.mode
@@ -163,7 +163,7 @@ def test_grad_cast_input(self):
         y = vector("y", dtype=self.dtype)
         c = iscalar("c")
         z = ifelse(c, self.cast_output(x), self.cast_output(y))
-        gx, gy = theano.grad(z.sum(), [x, y])
+        gx, gy = aesara.grad(z.sum(), [x, y])
 
         function([c, x, y], [gx, gy], mode=self.mode)
 
@@ -207,14 +207,14 @@ def test_multiple_out_grad(self):
         y2 = vector("y2")
         c = iscalar("c")
         z = ifelse(c, (x1, x2), (y1, y2))
-        grads = theano.grad(z[0].sum() + z[1].sum(), [x1, x2, y1, y2])
+        grads = aesara.grad(z[0].sum() + z[1].sum(), [x1, x2, y1, y2])
 
         f = function([c, x1, x2, y1, y2], grads)
         rng = np.random.RandomState(utt.fetch_seed())
 
         lens = [rng.randint(200) for i in range(4)]
         values = [
-            np.asarray(rng.uniform(size=(l,)), theano.config.floatX) for l in lens
+            np.asarray(rng.uniform(size=(l,)), aesara.config.floatX) for l in lens
         ]
         outs_1 = f(1, *values)
         assert all([x.shape[0] == y for x, y in zip(outs_1, lens)])
@@ -311,13 +311,13 @@ def test_broadcast_mismatch(self):
     def test_sparse_tensor_error(self):
         pytest.importorskip("scipy", minversion="0.7.0")
 
-        import theano.sparse
+        import aesara.sparse
 
         rng = np.random.RandomState(utt.fetch_seed())
         data = rng.rand(2, 3).astype(self.dtype)
         x = self.shared(data)
-        y = theano.sparse.matrix("csc", dtype=self.dtype, name="y")
-        z = theano.sparse.matrix("csr", dtype=self.dtype, name="z")
+        y = aesara.sparse.matrix("csc", dtype=self.dtype, name="y")
+        z = aesara.sparse.matrix("csr", dtype=self.dtype, name="z")
         cond = iscalar("cond")
 
         with pytest.raises(TypeError):
@@ -402,10 +402,10 @@ def test_pushout3(self):
         y1 = scalar("x2")
         y2 = scalar("y2")
         c = iscalar("c")
-        two = np.asarray(2, dtype=theano.config.floatX)
+        two = np.asarray(2, dtype=aesara.config.floatX)
         x, y = ifelse(c, (x1, y1), (two, y2), name="f1")
-        o3 = np.asarray(0.3, dtype=theano.config.floatX)
-        o2 = np.asarray(0.2, dtype=theano.config.floatX)
+        o3 = np.asarray(0.3, dtype=aesara.config.floatX)
+        o2 = np.asarray(0.2, dtype=aesara.config.floatX)
         z = ifelse(c, o3, o2, name="f2")
         out = x * z * y
 
@@ -485,15 +485,15 @@ def test_merge_ifs_true_false(self):
 
     def test_grad_test_values(self):
         # Regression test for test values of `ifelse` gradient.
-        with theano.config.change_flags(compute_test_value="raise"):
+        with aesara.config.change_flags(compute_test_value="raise"):
             x = scalar("x")
             x.tag.test_value = 1
             # Used to crash due to undefined test value.
-            theano.grad(ifelse(0, x, x), x)
+            aesara.grad(ifelse(0, x, x), x)
 
     def test_grad_int_value(self):
-        w = theano.shared(np.random.rand(10))
-        b = theano.shared(np.random.rand())
+        w = aesara.shared(np.random.rand(10))
+        b = aesara.shared(np.random.rand())
         params = [w, b]
 
         x = vector()
@@ -503,7 +503,7 @@ def test_grad_int_value(self):
         correct = score * y > 0
 
         loss = ifelse(correct, 0, 1)
-        [(param, param - 0.5 * theano.grad(cost=loss, wrt=param)) for param in params]
+        [(param, param - 0.5 * aesara.grad(cost=loss, wrt=param)) for param in params]
 
 
 class IfElseIfElseIf(Op):
@@ -615,17 +615,17 @@ def test_ifelse():
 
     lazys = [True]
     # We need lazy to end up being True for this test.
-    if theano.config.vm__lazy in [True, None]:
+    if aesara.config.vm__lazy in [True, None]:
         lazys = [True, None]
 
     cloops = [True, False]
 
-    if theano.config.cxx == "":
+    if aesara.config.cxx == "":
         cloops = [False]
 
     for cloop in cloops:
         for lazy in lazys:
-            linker = theano.link.vm.VMLinker(use_cloop=cloop, lazy=lazy)
+            linker = aesara.link.vm.VMLinker(use_cloop=cloop, lazy=lazy)
             f = function(
                 [a, b, c],
                 ifelse(a, notimpl(b), c),
@@ -655,11 +655,11 @@ def test_nested():
     t4 = ifelseifelseif(eq(x1, x2), x1, eq(x1, 5), x2, c2, t3, t3 + 0.5)
     t4.name = "t4"
 
-    linker = theano.link.vm.VMLinker(lazy=False)
+    linker = aesara.link.vm.VMLinker(lazy=False)
     f = function([c1, c2, x1, x2], t4, mode=Mode(linker=linker, optimizer="fast_run"))
     with pytest.raises(NotImplementedOpException):
         f(1, 0, np.array(10, dtype=x1.dtype), 0)
 
-    linker = theano.link.vm.VMLinker(lazy=True)
+    linker = aesara.link.vm.VMLinker(lazy=True)
     f = function([c1, c2, x1, x2], t4, mode=Mode(linker=linker, optimizer="fast_run"))
     assert f(1, 0, np.array(10, dtype=x1.dtype), 0) == 20.5
diff --git a/tests/test_printing.py b/tests/test_printing.py
index 5284db1101..5e703df4a7 100644
--- a/tests/test_printing.py
+++ b/tests/test_printing.py
@@ -7,16 +7,16 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
-from theano.printing import (
+import aesara
+import aesara.tensor as tt
+from aesara.printing import (
     debugprint,
     min_informative_str,
     pp,
     pydot_imported,
     pydotprint,
 )
-from theano.tensor.type import dvector, iscalar, matrix, scalar, vector
+from aesara.tensor.type import dvector, iscalar, matrix, scalar, vector
 
 
 @pytest.mark.skipif(not pydot_imported, reason="pydot not available")
@@ -24,21 +24,21 @@ def test_pydotprint_cond_highlight():
     # This is a REALLY PARTIAL TEST.
     # I did them to help debug stuff.
     x = dvector()
-    f = theano.function([x], x * 2)
+    f = aesara.function([x], x * 2)
     f([1, 2, 3, 4])
 
     s = StringIO()
     new_handler = logging.StreamHandler(s)
     new_handler.setLevel(logging.DEBUG)
-    orig_handler = theano.logging_default_handler
+    orig_handler = aesara.logging_default_handler
 
-    theano.theano_logger.removeHandler(orig_handler)
-    theano.theano_logger.addHandler(new_handler)
+    aesara.aesara_logger.removeHandler(orig_handler)
+    aesara.aesara_logger.addHandler(new_handler)
     try:
         pydotprint(f, cond_highlight=True, print_output_file=False)
     finally:
-        theano.theano_logger.addHandler(orig_handler)
-        theano.theano_logger.removeHandler(new_handler)
+        aesara.aesara_logger.addHandler(orig_handler)
+        aesara.aesara_logger.removeHandler(new_handler)
 
     assert (
         s.getvalue() == "pydotprint: cond_highlight is set but there"
@@ -60,8 +60,8 @@ def test_pydotprint_long_name():
     # names are different, but not the shortened names.
     # We should not merge those nodes in the dot graph.
     x = dvector()
-    mode = theano.compile.mode.get_default_mode().excluding("fusion")
-    f = theano.function([x], [x * 2, x + x], mode=mode)
+    mode = aesara.compile.mode.get_default_mode().excluding("fusion")
+    f = aesara.function([x], [x * 2, x + x], mode=mode)
     f([1, 2, 3, 4])
 
     pydotprint(f, max_label_size=5, print_output_file=False)
@@ -69,13 +69,13 @@ def test_pydotprint_long_name():
 
 
 @pytest.mark.skipif(
-    not pydot_imported or theano.config.mode in ("DebugMode", "DEBUG_MODE"),
+    not pydot_imported or aesara.config.mode in ("DebugMode", "DEBUG_MODE"),
     reason="Can't profile in DebugMode",
 )
 def test_pydotprint_profile():
     A = matrix()
-    prof = theano.compile.ProfileStats(atexit_print=False, gpu_checks=False)
-    f = theano.function([A], A + 1, profile=prof)
+    prof = aesara.compile.ProfileStats(atexit_print=False, gpu_checks=False)
+    f = aesara.function([A], A + 1, profile=prof)
     pydotprint(f, print_output_file=False)
     f([[1]])
     pydotprint(f, print_output_file=False)
@@ -120,8 +120,8 @@ def test_debugprint():
 
     F = D + E
     G = C + F
-    mode = theano.compile.get_default_mode().including("fusion")
-    g = theano.function([A, B, D, E], G, mode=mode)
+    mode = aesara.compile.get_default_mode().including("fusion")
+    g = aesara.function([A, B, D, E], G, mode=mode)
 
     # just test that it work
     s = StringIO()
@@ -257,7 +257,7 @@ def test_scan_debugprint1():
     A = dvector("A")
 
     # Symbolic description of the result
-    result, updates = theano.scan(
+    result, updates = aesara.scan(
         fn=lambda prior_result, A: prior_result * A,
         outputs_info=tt.ones_like(A),
         non_sequences=A,
@@ -314,7 +314,7 @@ def test_scan_debugprint2():
     max_coefficients_supported = 10000
 
     # Generate the components of the polynomial
-    components, updates = theano.scan(
+    components, updates = aesara.scan(
         fn=lambda coefficient, power, free_variable: coefficient
         * (free_variable ** power),
         outputs_info=None,
@@ -379,7 +379,7 @@ def test_scan_debugprint3():
     # compute A**k
     def compute_A_k(A, k):
         # Symbolic description of the result
-        result, updates = theano.scan(
+        result, updates = aesara.scan(
             fn=lambda prior_result, A: prior_result * A,
             outputs_info=tt.ones_like(A),
             non_sequences=A,
@@ -391,7 +391,7 @@ def compute_A_k(A, k):
         return A_k
 
     # Generate the components of the polynomial
-    components, updates = theano.scan(
+    components, updates = aesara.scan(
         fn=lambda coefficient, power, some_A, some_k: coefficient
         * (compute_A_k(some_A, some_k) ** power),
         outputs_info=None,
@@ -486,10 +486,10 @@ def test_scan_debugprint4():
     def fn(a_m2, a_m1, b_m2, b_m1):
         return a_m1 + a_m2, b_m1 + b_m2
 
-    a0 = theano.shared(np.arange(2, dtype="int64"))
-    b0 = theano.shared(np.arange(2, dtype="int64"))
+    a0 = aesara.shared(np.arange(2, dtype="int64"))
+    b0 = aesara.shared(np.arange(2, dtype="int64"))
 
-    (a, b), _ = theano.scan(
+    (a, b), _ = aesara.scan(
         fn,
         outputs_info=[
             {"initial": a0, "taps": [-2, -1]},
@@ -561,14 +561,14 @@ def test_scan_debugprint5():
     A = dvector("A")
 
     # Symbolic description of the result
-    result, updates = theano.scan(
+    result, updates = aesara.scan(
         fn=lambda prior_result, A: prior_result * A,
         outputs_info=tt.ones_like(A),
         non_sequences=A,
         n_steps=k,
     )
 
-    final_result = theano.grad(result[-1].sum(), A)
+    final_result = aesara.grad(result[-1].sum(), A)
 
     output_str = debugprint(final_result, file="str")
     lines = output_str.split("\n")
@@ -698,10 +698,10 @@ def f_pow2(x_tm1):
 
     state = scalar("state")
     n_steps = iscalar("nsteps")
-    output, updates = theano.scan(
+    output, updates = aesara.scan(
         f_pow2, [], state, [], n_steps=n_steps, truncate_gradient=-1, go_backwards=False
     )
-    f = theano.function(
+    f = aesara.function(
         [state, n_steps], output, updates=updates, allow_input_downcast=True
     )
     pydotprint(output, scan_graphs=True)
diff --git a/tests/test_record.py b/tests/test_record.py
index fac1917045..9c29c91954 100644
--- a/tests/test_record.py
+++ b/tests/test_record.py
@@ -1,8 +1,8 @@
 from io import StringIO
 
+from aesara import function
+from aesara.tensor.type import iscalar
 from tests.record import MismatchError, Record, RecordMode
-from theano import function
-from theano.tensor.type import iscalar
 
 
 def test_record_good():
@@ -72,7 +72,7 @@ def test_record_bad():
 
 def test_record_mode_good():
     # Like test_record_good, but some events are recorded by the
-    # theano RecordMode. We don't attempt to check the
+    # aesara RecordMode. We don't attempt to check the
     # exact string value of the record in this case.
 
     # Record a sequence of events
@@ -110,7 +110,7 @@ def test_record_mode_good():
 
 def test_record_mode_bad():
     # Like test_record_bad, but some events are recorded by the
-    # theano RecordMode, as is the event that triggers the mismatch
+    # aesara RecordMode, as is the event that triggers the mismatch
     # error.
 
     # Record a sequence of events
diff --git a/tests/test_rop.py b/tests/test_rop.py
index 27044e1f76..0ad0479d7b 100644
--- a/tests/test_rop.py
+++ b/tests/test_rop.py
@@ -17,18 +17,18 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.tensor as tt
+import aesara
+import aesara.tensor as tt
+from aesara import function
+from aesara.gradient import Lop, Rop, grad, grad_undefined
+from aesara.graph.basic import Apply
+from aesara.graph.op import Op
+from aesara.tensor.math import argmax, dot
+from aesara.tensor.math import max as tt_max
+from aesara.tensor.nnet import conv, conv2d
+from aesara.tensor.signal.pool import Pool
+from aesara.tensor.type import TensorType, matrix, vector
 from tests import unittest_tools as utt
-from theano import function
-from theano.gradient import Lop, Rop, grad, grad_undefined
-from theano.graph.basic import Apply
-from theano.graph.op import Op
-from theano.tensor.math import argmax, dot
-from theano.tensor.math import max as tt_max
-from theano.tensor.nnet import conv, conv2d
-from theano.tensor.signal.pool import Pool
-from theano.tensor.type import TensorType, matrix, vector
 
 
 class BreakRop(Op):
@@ -105,11 +105,11 @@ def check_mat_rop_lop(self, y, out_shape):
         If you want to test an Op with an output matrix, add a sum
         after the Op you want to test.
         """
-        vx = np.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
-        vv = np.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
+        vx = np.asarray(self.rng.uniform(size=self.mat_in_shape), aesara.config.floatX)
+        vv = np.asarray(self.rng.uniform(size=self.mat_in_shape), aesara.config.floatX)
         yv = Rop(y, self.mx, self.mv)
         rop_f = function([self.mx, self.mv], yv, on_unused_input="ignore")
-        sy, _ = theano.scan(
+        sy, _ = aesara.scan(
             lambda i, y, x, v: (grad(y[i], x) * v).sum(),
             sequences=tt.arange(y.shape[0]),
             non_sequences=[y, self.mx, self.mv],
@@ -122,10 +122,10 @@ def check_mat_rop_lop(self, y, out_shape):
         assert np.allclose(v1, v2), f"ROP mismatch: {v1} {v2}"
 
         self.check_nondiff_rop(
-            theano.clone_replace(y, replace={self.mx: break_op(self.mx)})
+            aesara.clone_replace(y, replace={self.mx: break_op(self.mx)})
         )
 
-        vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
+        vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX)
         yv = Lop(y, self.mx, self.v)
         lop_f = function([self.mx, self.v], yv)
 
@@ -142,12 +142,12 @@ def check_rop_lop(self, y, out_shape):
         vector. The output is still a vector.
         """
         # TEST ROP
-        vx = np.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
-        vv = np.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
+        vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX)
+        vv = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX)
 
         yv = Rop(y, self.x, self.v)
         rop_f = function([self.x, self.v], yv, on_unused_input="ignore")
-        J, _ = theano.scan(
+        J, _ = aesara.scan(
             lambda i, y, x: grad(y[i], x),
             sequences=tt.arange(y.shape[0]),
             non_sequences=[y, self.x],
@@ -162,7 +162,7 @@ def check_rop_lop(self, y, out_shape):
 
         try:
             Rop(
-                theano.clone_replace(y, replace={self.x: break_op(self.x)}),
+                aesara.clone_replace(y, replace={self.x: break_op(self.x)}),
                 self.x,
                 self.v,
             )
@@ -172,12 +172,12 @@ def check_rop_lop(self, y, out_shape):
                 "correctly. Bug exposed by fixing Add.grad method."
             )
 
-        vx = np.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
-        vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
+        vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX)
+        vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX)
 
         yv = Lop(y, self.x, self.v)
         lop_f = function([self.x, self.v], yv, on_unused_input="ignore")
-        J, _ = theano.scan(
+        J, _ = aesara.scan(
             lambda i, y, x: grad(y[i], x),
             sequences=tt.arange(y.shape[0]),
             non_sequences=[y, self.x],
@@ -206,31 +206,31 @@ def test_subtensor(self):
         self.check_rop_lop(self.x[:4], (4,))
 
     def test_incsubtensor1(self):
-        tv = np.asarray(self.rng.uniform(size=(3,)), theano.config.floatX)
-        t = theano.shared(tv)
-        out = theano.tensor.subtensor.inc_subtensor(self.x[:3], t)
+        tv = np.asarray(self.rng.uniform(size=(3,)), aesara.config.floatX)
+        t = aesara.shared(tv)
+        out = aesara.tensor.subtensor.inc_subtensor(self.x[:3], t)
         self.check_rop_lop(out, self.in_shape)
 
     def test_incsubtensor2(self):
-        tv = np.asarray(self.rng.uniform(size=(10,)), theano.config.floatX)
-        t = theano.shared(tv)
-        out = theano.tensor.subtensor.inc_subtensor(t[:4], self.x[:4])
+        tv = np.asarray(self.rng.uniform(size=(10,)), aesara.config.floatX)
+        t = aesara.shared(tv)
+        out = aesara.tensor.subtensor.inc_subtensor(t[:4], self.x[:4])
         self.check_rop_lop(out, (10,))
 
     def test_setsubtensor1(self):
-        tv = np.asarray(self.rng.uniform(size=(3,)), theano.config.floatX)
-        t = theano.shared(tv)
-        out = theano.tensor.subtensor.set_subtensor(self.x[:3], t)
+        tv = np.asarray(self.rng.uniform(size=(3,)), aesara.config.floatX)
+        t = aesara.shared(tv)
+        out = aesara.tensor.subtensor.set_subtensor(self.x[:3], t)
         self.check_rop_lop(out, self.in_shape)
 
     def test_print(self):
-        out = theano.printing.Print("x", attrs=("shape",))(self.x)
+        out = aesara.printing.Print("x", attrs=("shape",))(self.x)
         self.check_rop_lop(out, self.in_shape)
 
     def test_setsubtensor2(self):
-        tv = np.asarray(self.rng.uniform(size=(10,)), theano.config.floatX)
-        t = theano.shared(tv)
-        out = theano.tensor.subtensor.set_subtensor(t[:4], self.x[:4])
+        tv = np.asarray(self.rng.uniform(size=(10,)), aesara.config.floatX)
+        t = aesara.shared(tv)
+        out = aesara.tensor.subtensor.set_subtensor(t[:4], self.x[:4])
         self.check_rop_lop(out, (10,))
 
     def test_dimshuffle(self):
@@ -281,17 +281,17 @@ def test_downsample(self):
             vx = rng.rand(*shp)
             vex = rng.rand(*shp)
 
-            x = theano.shared(vx)
-            ex = theano.shared(vex)
+            x = aesara.shared(vx)
+            ex = aesara.shared(vex)
 
             maxpool_op = Pool(ignore_border, ndim=len(ws))
             a_pooled = maxpool_op(x, ws).flatten()
             yv = Rop(a_pooled, x, ex)
             mode = None
-            if theano.config.mode == "FAST_COMPILE":
+            if aesara.config.mode == "FAST_COMPILE":
                 mode = "FAST_RUN"
             rop_f = function([], yv, on_unused_input="ignore", mode=mode)
-            sy, _ = theano.scan(
+            sy, _ = aesara.scan(
                 lambda i, y, x, v: (grad(y[i], x) * v).sum(),
                 sequences=tt.arange(a_pooled.shape[0]),
                 non_sequences=[a_pooled, x, ex],
@@ -309,16 +309,16 @@ def test_conv(self):
                 filter_shape = (2, 2, 2, 3)
                 image_dim = len(image_shape)
                 filter_dim = len(filter_shape)
-                input = TensorType(theano.config.floatX, [False] * image_dim)(
+                input = TensorType(aesara.config.floatX, [False] * image_dim)(
                     name="input"
                 )
-                filters = TensorType(theano.config.floatX, [False] * filter_dim)(
+                filters = TensorType(aesara.config.floatX, [False] * filter_dim)(
                     name="filter"
                 )
-                ev_input = TensorType(theano.config.floatX, [False] * image_dim)(
+                ev_input = TensorType(aesara.config.floatX, [False] * image_dim)(
                     name="ev_input"
                 )
-                ev_filters = TensorType(theano.config.floatX, [False] * filter_dim)(
+                ev_filters = TensorType(aesara.config.floatX, [False] * filter_dim)(
                     name="ev_filters"
                 )
 
@@ -328,7 +328,7 @@ def sym_conv2d(input, filters):
                 output = sym_conv2d(input, filters).flatten()
                 yv = Rop(output, [input, filters], [ev_input, ev_filters])
                 mode = None
-                if theano.config.mode == "FAST_COMPILE":
+                if aesara.config.mode == "FAST_COMPILE":
                     mode = "FAST_RUN"
                 rop_f = function(
                     [input, filters, ev_input, ev_filters],
@@ -336,7 +336,7 @@ def sym_conv2d(input, filters):
                     on_unused_input="ignore",
                     mode=mode,
                 )
-                sy, _ = theano.scan(
+                sy, _ = aesara.scan(
                     lambda i, y, x1, x2, v1, v2: (grad(y[i], x1) * v1).sum()
                     + (grad(y[i], x2) * v2).sum(),
                     sequences=tt.arange(output.shape[0]),
@@ -349,7 +349,7 @@ def sym_conv2d(input, filters):
                     on_unused_input="ignore",
                     mode=mode,
                 )
-                dtype = theano.config.floatX
+                dtype = aesara.config.floatX
                 image_data = np.random.random(image_shape).astype(dtype)
                 filter_data = np.random.random(filter_shape).astype(dtype)
                 ev_image_data = np.random.random(image_shape).astype(dtype)
@@ -359,15 +359,15 @@ def sym_conv2d(input, filters):
                 assert np.allclose(v1, v2), f"Rop mismatch: {v1} {v2}"
 
     def test_join(self):
-        tv = np.asarray(self.rng.uniform(size=(10,)), theano.config.floatX)
-        t = theano.shared(tv)
+        tv = np.asarray(self.rng.uniform(size=(10,)), aesara.config.floatX)
+        t = aesara.shared(tv)
         out = tt.join(0, self.x, t)
         self.check_rop_lop(out, (self.in_shape[0] + 10,))
 
     def test_dot(self):
         insh = self.in_shape[0]
-        vW = np.asarray(self.rng.uniform(size=(insh, insh)), theano.config.floatX)
-        W = theano.shared(vW)
+        vW = np.asarray(self.rng.uniform(size=(insh, insh)), aesara.config.floatX)
+        W = aesara.shared(vW)
         self.check_rop_lop(dot(self.x, W), self.in_shape)
 
     def test_elemwise0(self):
@@ -386,7 +386,7 @@ def test_sum(self):
 
     def test_softmax(self):
         # Softmax adds an extra dimnesion !
-        self.check_rop_lop(theano.tensor.nnet.softmax(self.x)[0], self.in_shape[0])
+        self.check_rop_lop(aesara.tensor.nnet.softmax(self.x)[0], self.in_shape[0])
 
     def test_alloc(self):
         # Alloc of the sum of x into a vector
@@ -419,10 +419,10 @@ def test_multiple_outputs(self):
         m_ = matrix("m_")
         v_ = vector("v_")
 
-        mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
-        vval = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
-        m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
-        v_val = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
+        mval = self.rng.uniform(size=(3, 7)).astype(aesara.config.floatX)
+        vval = self.rng.uniform(size=(7,)).astype(aesara.config.floatX)
+        m_val = self.rng.uniform(size=(3, 7)).astype(aesara.config.floatX)
+        v_val = self.rng.uniform(size=(7,)).astype(aesara.config.floatX)
 
         rop_out1 = Rop([m, v, m + v], [m, v], [m_, v_])
         assert isinstance(rop_out1, list)
@@ -434,7 +434,7 @@ def test_multiple_outputs(self):
         all_outs = []
         for o in rop_out1, rop_out2:
             all_outs.extend(o)
-        f = theano.function([m, v, m_, v_], all_outs)
+        f = aesara.function([m, v, m_, v_], all_outs)
         f(mval, vval, m_val, v_val)
 
     def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
@@ -443,6 +443,6 @@ def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
         # one differentiable path (i.e. there is no gradient wrt to one of
         # the inputs).
         x = tt.arange(20.0).reshape([1, 20])
-        v = theano.shared(np.ones([20]))
+        v = aesara.shared(np.ones([20]))
         d = dot(x, v).sum()
         Rop(grad(d, v), v, v)
diff --git a/tests/test_updates.py b/tests/test_updates.py
index ae44427742..7aa3912bfc 100644
--- a/tests/test_updates.py
+++ b/tests/test_updates.py
@@ -1,8 +1,8 @@
 import pytest
 
-import theano
-from theano.tensor.type import vector
-from theano.updates import OrderedUpdates
+import aesara
+from aesara.tensor.type import vector
+from aesara.updates import OrderedUpdates
 
 
 class TestIfelse:
@@ -10,7 +10,7 @@ def test_updates_init(self):
         with pytest.raises(TypeError):
             OrderedUpdates(dict(d=3))
 
-        sv = theano.shared("asdf")
+        sv = aesara.shared("asdf")
         OrderedUpdates({sv: 3})
 
     def test_updates_setitem(self):
@@ -23,15 +23,15 @@ def test_updates_setitem(self):
         with pytest.raises(TypeError):
             up.__setitem__(vector(), 7)
 
-        up[theano.shared(88)] = 7
+        up[aesara.shared(88)] = 7
 
     def test_updates_add(self):
 
         up1 = OrderedUpdates()
         up2 = OrderedUpdates()
 
-        a = theano.shared("a")
-        b = theano.shared("b")
+        a = aesara.shared("a")
+        b = aesara.shared("b")
 
         assert not up1 + up2
 
diff --git a/tests/typed_list/test_basic.py b/tests/typed_list/test_basic.py
index 4f2ed98cf0..bbc8f378e6 100644
--- a/tests/typed_list/test_basic.py
+++ b/tests/typed_list/test_basic.py
@@ -1,11 +1,10 @@
 import numpy as np
 import pytest
 
-import theano
-import theano.typed_list
-from tests import unittest_tools as utt
-from theano import sparse
-from theano.tensor.type import (
+import aesara
+import aesara.typed_list
+from aesara import sparse
+from aesara.tensor.type import (
     TensorType,
     integer_dtypes,
     matrix,
@@ -13,9 +12,9 @@
     tensor3,
     vector,
 )
-from theano.tensor.type_other import SliceType
-from theano.tensor.var import TensorVariable
-from theano.typed_list.basic import (
+from aesara.tensor.type_other import SliceType
+from aesara.tensor.var import TensorVariable
+from aesara.typed_list.basic import (
     Append,
     Count,
     Extend,
@@ -27,13 +26,14 @@
     Reverse,
     make_list,
 )
-from theano.typed_list.type import TypedListType
+from aesara.typed_list.type import TypedListType
+from tests import unittest_tools as utt
 
 
 def rand_ranged_matrix(minimum, maximum, shape):
     return np.asarray(
         np.random.rand(*shape) * (maximum - minimum) + minimum,
-        dtype=theano.config.floatX,
+        dtype=aesara.config.floatX,
     )
 
 
@@ -61,7 +61,7 @@ def setup_method(self):
     def test_sanity_check_slice(self):
 
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         mySymbolicSlice = SliceType()()
@@ -70,7 +70,7 @@ def test_sanity_check_slice(self):
 
         assert not isinstance(z, TensorVariable)
 
-        f = theano.function([mySymbolicMatricesList, mySymbolicSlice], z)
+        f = aesara.function([mySymbolicMatricesList, mySymbolicSlice], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -79,14 +79,14 @@ def test_sanity_check_slice(self):
     def test_sanity_check_single(self):
 
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         mySymbolicScalar = scalar(dtype="int64")
 
         z = GetItem()(mySymbolicMatricesList, mySymbolicScalar)
 
-        f = theano.function([mySymbolicMatricesList, mySymbolicScalar], z)
+        f = aesara.function([mySymbolicMatricesList, mySymbolicScalar], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -94,13 +94,13 @@ def test_sanity_check_single(self):
 
     def test_interface(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         mySymbolicScalar = scalar(dtype="int64")
 
         z = mySymbolicMatricesList[mySymbolicScalar]
 
-        f = theano.function([mySymbolicMatricesList, mySymbolicScalar], z)
+        f = aesara.function([mySymbolicMatricesList, mySymbolicScalar], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -108,13 +108,13 @@ def test_interface(self):
 
         z = mySymbolicMatricesList[0]
 
-        f = theano.function([mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicMatricesList], z)
 
         assert np.array_equal(f([x]), x)
 
     def test_wrong_input(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         mySymbolicMatrix = matrix()
 
@@ -123,12 +123,12 @@ def test_wrong_input(self):
 
     def test_constant_input(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = GetItem()(mySymbolicMatricesList, 0)
 
-        f = theano.function([mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicMatricesList], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -136,7 +136,7 @@ def test_constant_input(self):
 
         z = GetItem()(mySymbolicMatricesList, slice(0, 1, 1))
 
-        f = theano.function([mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicMatricesList], z)
 
         assert np.array_equal(f([x]), [x])
 
@@ -144,13 +144,13 @@ def test_constant_input(self):
 class TestAppend:
     def test_inplace(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = Append(True)(mySymbolicMatricesList, myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z, accept_inplace=True)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z, accept_inplace=True)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -160,13 +160,13 @@ def test_inplace(self):
 
     def test_sanity_check(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = Append()(mySymbolicMatricesList, myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -176,13 +176,13 @@ def test_sanity_check(self):
 
     def test_interfaces(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = mySymbolicMatricesList.append(myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -194,15 +194,15 @@ def test_interfaces(self):
 class TestExtend:
     def test_inplace(self):
         mySymbolicMatricesList1 = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         mySymbolicMatricesList2 = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Extend(True)(mySymbolicMatricesList1, mySymbolicMatricesList2)
 
-        f = theano.function(
+        f = aesara.function(
             [mySymbolicMatricesList1, mySymbolicMatricesList2], z, accept_inplace=True
         )
 
@@ -214,15 +214,15 @@ def test_inplace(self):
 
     def test_sanity_check(self):
         mySymbolicMatricesList1 = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         mySymbolicMatricesList2 = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Extend()(mySymbolicMatricesList1, mySymbolicMatricesList2)
 
-        f = theano.function([mySymbolicMatricesList1, mySymbolicMatricesList2], z)
+        f = aesara.function([mySymbolicMatricesList1, mySymbolicMatricesList2], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -232,15 +232,15 @@ def test_sanity_check(self):
 
     def test_interface(self):
         mySymbolicMatricesList1 = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         mySymbolicMatricesList2 = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = mySymbolicMatricesList1.extend(mySymbolicMatricesList2)
 
-        f = theano.function([mySymbolicMatricesList1, mySymbolicMatricesList2], z)
+        f = aesara.function([mySymbolicMatricesList1, mySymbolicMatricesList2], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -252,14 +252,14 @@ def test_interface(self):
 class TestInsert:
     def test_inplace(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
         myScalar = scalar(dtype="int64")
 
         z = Insert(True)(mySymbolicMatricesList, myScalar, myMatrix)
 
-        f = theano.function(
+        f = aesara.function(
             [mySymbolicMatricesList, myScalar, myMatrix], z, accept_inplace=True
         )
 
@@ -271,14 +271,14 @@ def test_inplace(self):
 
     def test_sanity_check(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
         myScalar = scalar(dtype="int64")
 
         z = Insert()(mySymbolicMatricesList, myScalar, myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myScalar, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myScalar, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -288,14 +288,14 @@ def test_sanity_check(self):
 
     def test_interface(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
         myScalar = scalar(dtype="int64")
 
         z = mySymbolicMatricesList.insert(myScalar, myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myScalar, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myScalar, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -307,13 +307,13 @@ def test_interface(self):
 class TestRemove:
     def test_inplace(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = Remove(True)(mySymbolicMatricesList, myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z, accept_inplace=True)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z, accept_inplace=True)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -323,13 +323,13 @@ def test_inplace(self):
 
     def test_sanity_check(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = Remove()(mySymbolicMatricesList, myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -339,13 +339,13 @@ def test_sanity_check(self):
 
     def test_interface(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = mySymbolicMatricesList.remove(myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -357,12 +357,12 @@ def test_interface(self):
 class TestReverse:
     def test_inplace(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Reverse(True)(mySymbolicMatricesList)
 
-        f = theano.function([mySymbolicMatricesList], z, accept_inplace=True)
+        f = aesara.function([mySymbolicMatricesList], z, accept_inplace=True)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -372,12 +372,12 @@ def test_inplace(self):
 
     def test_sanity_check(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Reverse()(mySymbolicMatricesList)
 
-        f = theano.function([mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicMatricesList], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -387,12 +387,12 @@ def test_sanity_check(self):
 
     def test_interface(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = mySymbolicMatricesList.reverse()
 
-        f = theano.function([mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicMatricesList], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -404,13 +404,13 @@ def test_interface(self):
 class TestIndex:
     def test_sanity_check(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = Index()(mySymbolicMatricesList, myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -420,13 +420,13 @@ def test_sanity_check(self):
 
     def test_interface(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = mySymbolicMatricesList.ind(myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -436,15 +436,15 @@ def test_interface(self):
 
     def test_non_tensor_type(self):
         mySymbolicNestedMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False)), 1
+            TensorType(aesara.config.floatX, (False, False)), 1
         )()
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Index()(mySymbolicNestedMatricesList, mySymbolicMatricesList)
 
-        f = theano.function([mySymbolicNestedMatricesList, mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicNestedMatricesList, mySymbolicMatricesList], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -455,16 +455,16 @@ def test_non_tensor_type(self):
     def test_sparse(self):
         sp = pytest.importorskip("scipy")
         mySymbolicSparseList = TypedListType(
-            sparse.SparseType("csr", theano.config.floatX)
+            sparse.SparseType("csr", aesara.config.floatX)
         )()
         mySymbolicSparse = sparse.csr_matrix()
 
         z = Index()(mySymbolicSparseList, mySymbolicSparse)
 
-        f = theano.function([mySymbolicSparseList, mySymbolicSparse], z)
+        f = aesara.function([mySymbolicSparseList, mySymbolicSparse], z)
 
-        x = sp.sparse.csr_matrix(random_lil((10, 40), theano.config.floatX, 3))
-        y = sp.sparse.csr_matrix(random_lil((10, 40), theano.config.floatX, 3))
+        x = sp.sparse.csr_matrix(random_lil((10, 40), aesara.config.floatX, 3))
+        y = sp.sparse.csr_matrix(random_lil((10, 40), aesara.config.floatX, 3))
 
         assert f([x, y], y) == 1
 
@@ -472,13 +472,13 @@ def test_sparse(self):
 class TestCount:
     def test_sanity_check(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = Count()(mySymbolicMatricesList, myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -488,13 +488,13 @@ def test_sanity_check(self):
 
     def test_interface(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         myMatrix = matrix()
 
         z = mySymbolicMatricesList.count(myMatrix)
 
-        f = theano.function([mySymbolicMatricesList, myMatrix], z)
+        f = aesara.function([mySymbolicMatricesList, myMatrix], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -504,15 +504,15 @@ def test_interface(self):
 
     def test_non_tensor_type(self):
         mySymbolicNestedMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False)), 1
+            TensorType(aesara.config.floatX, (False, False)), 1
         )()
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Count()(mySymbolicNestedMatricesList, mySymbolicMatricesList)
 
-        f = theano.function([mySymbolicNestedMatricesList, mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicNestedMatricesList, mySymbolicMatricesList], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -523,16 +523,16 @@ def test_non_tensor_type(self):
     def test_sparse(self):
         sp = pytest.importorskip("scipy")
         mySymbolicSparseList = TypedListType(
-            sparse.SparseType("csr", theano.config.floatX)
+            sparse.SparseType("csr", aesara.config.floatX)
         )()
         mySymbolicSparse = sparse.csr_matrix()
 
         z = Count()(mySymbolicSparseList, mySymbolicSparse)
 
-        f = theano.function([mySymbolicSparseList, mySymbolicSparse], z)
+        f = aesara.function([mySymbolicSparseList, mySymbolicSparse], z)
 
-        x = sp.sparse.csr_matrix(random_lil((10, 40), theano.config.floatX, 3))
-        y = sp.sparse.csr_matrix(random_lil((10, 40), theano.config.floatX, 3))
+        x = sp.sparse.csr_matrix(random_lil((10, 40), aesara.config.floatX, 3))
+        y = sp.sparse.csr_matrix(random_lil((10, 40), aesara.config.floatX, 3))
 
         assert f([x, y, y], y) == 2
 
@@ -540,12 +540,12 @@ def test_sparse(self):
 class TestLength:
     def test_sanity_check(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Length()(mySymbolicMatricesList)
 
-        f = theano.function([mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicMatricesList], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -553,11 +553,11 @@ def test_sanity_check(self):
 
     def test_interface(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         z = mySymbolicMatricesList.__len__()
 
-        f = theano.function([mySymbolicMatricesList], z)
+        f = aesara.function([mySymbolicMatricesList], z)
 
         x = rand_ranged_matrix(-1000, 1000, [100, 101])
 
@@ -579,16 +579,16 @@ def test_correct_answer(self):
         x = tensor3()
         y = tensor3()
 
-        A = np.cast[theano.config.floatX](np.random.rand(5, 3))
-        B = np.cast[theano.config.floatX](np.random.rand(7, 2))
-        X = np.cast[theano.config.floatX](np.random.rand(5, 6, 1))
-        Y = np.cast[theano.config.floatX](np.random.rand(1, 9, 3))
+        A = np.cast[aesara.config.floatX](np.random.rand(5, 3))
+        B = np.cast[aesara.config.floatX](np.random.rand(7, 2))
+        X = np.cast[aesara.config.floatX](np.random.rand(5, 6, 1))
+        Y = np.cast[aesara.config.floatX](np.random.rand(1, 9, 3))
 
         make_list((3.0, 4.0))
         c = make_list((a, b))
         z = make_list((x, y))
-        fc = theano.function([a, b], c)
-        fz = theano.function([x, y], z)
+        fc = aesara.function([a, b], c)
+        fz = aesara.function([x, y], z)
         for m, n in zip(fc(A, B), [A, B]):
             assert (m == n).all()
         for m, n in zip(fz(X, Y), [X, Y]):
diff --git a/tests/typed_list/test_opt.py b/tests/typed_list/test_opt.py
index 7d3ecc4beb..5bf1917939 100644
--- a/tests/typed_list/test_opt.py
+++ b/tests/typed_list/test_opt.py
@@ -1,24 +1,24 @@
 import numpy as np
 
-import theano
-import theano.tensor as tt
-import theano.typed_list
+import aesara
+import aesara.tensor as tt
+import aesara.typed_list
+from aesara.compile.io import In
+from aesara.tensor.type import TensorType, matrix, scalar
+from aesara.typed_list.basic import Append, Extend, Insert, Remove, Reverse
+from aesara.typed_list.type import TypedListType
 from tests.tensor.utils import rand_ranged
-from theano.compile.io import In
-from theano.tensor.type import TensorType, matrix, scalar
-from theano.typed_list.basic import Append, Extend, Insert, Remove, Reverse
-from theano.typed_list.type import TypedListType
 
 
 class TestInplace:
     def test_reverse_inplace(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Reverse()(mySymbolicMatricesList)
-        m = theano.compile.mode.get_default_mode().including("typed_list_inplace_opt")
-        f = theano.function(
+        m = aesara.compile.mode.get_default_mode().including("typed_list_inplace_opt")
+        f = aesara.function(
             [In(mySymbolicMatricesList, borrow=True, mutable=True)],
             z,
             accept_inplace=True,
@@ -34,12 +34,12 @@ def test_reverse_inplace(self):
 
     def test_append_inplace(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         mySymbolicMatrix = matrix()
         z = Append()(mySymbolicMatricesList, mySymbolicMatrix)
-        m = theano.compile.mode.get_default_mode().including("typed_list_inplace_opt")
-        f = theano.function(
+        m = aesara.compile.mode.get_default_mode().including("typed_list_inplace_opt")
+        f = aesara.function(
             [
                 In(mySymbolicMatricesList, borrow=True, mutable=True),
                 In(mySymbolicMatrix, borrow=True, mutable=True),
@@ -58,16 +58,16 @@ def test_append_inplace(self):
 
     def test_extend_inplace(self):
         mySymbolicMatricesList1 = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         mySymbolicMatricesList2 = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         z = Extend()(mySymbolicMatricesList1, mySymbolicMatricesList2)
-        m = theano.compile.mode.get_default_mode().including("typed_list_inplace_opt")
-        f = theano.function(
+        m = aesara.compile.mode.get_default_mode().including("typed_list_inplace_opt")
+        f = aesara.function(
             [
                 In(mySymbolicMatricesList1, borrow=True, mutable=True),
                 mySymbolicMatricesList2,
@@ -85,15 +85,15 @@ def test_extend_inplace(self):
 
     def test_insert_inplace(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         mySymbolicIndex = scalar(dtype="int64")
         mySymbolicMatrix = matrix()
 
         z = Insert()(mySymbolicMatricesList, mySymbolicIndex, mySymbolicMatrix)
-        m = theano.compile.mode.get_default_mode().including("typed_list_inplace_opt")
+        m = aesara.compile.mode.get_default_mode().including("typed_list_inplace_opt")
 
-        f = theano.function(
+        f = aesara.function(
             [
                 In(mySymbolicMatricesList, borrow=True, mutable=True),
                 mySymbolicIndex,
@@ -113,12 +113,12 @@ def test_insert_inplace(self):
 
     def test_remove_inplace(self):
         mySymbolicMatricesList = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
         mySymbolicMatrix = matrix()
         z = Remove()(mySymbolicMatricesList, mySymbolicMatrix)
-        m = theano.compile.mode.get_default_mode().including("typed_list_inplace_opt")
-        f = theano.function(
+        m = aesara.compile.mode.get_default_mode().including("typed_list_inplace_opt")
+        f = aesara.function(
             [
                 In(mySymbolicMatricesList, borrow=True, mutable=True),
                 In(mySymbolicMatrix, borrow=True, mutable=True),
@@ -138,9 +138,9 @@ def test_remove_inplace(self):
 
 def test_constant_folding():
     m = tt.ones((1,), dtype="int8")
-    l = theano.typed_list.make_list([m, m])
-    f = theano.function([], l)
+    l = aesara.typed_list.make_list([m, m])
+    f = aesara.function([], l)
     topo = f.maker.fgraph.toposort()
     assert len(topo)
-    assert isinstance(topo[0].op, theano.compile.ops.DeepCopyOp)
+    assert isinstance(topo[0].op, aesara.compile.ops.DeepCopyOp)
     assert f() == [1, 1]
diff --git a/tests/typed_list/test_type.py b/tests/typed_list/test_type.py
index 7fb671b3da..79b95c3fb9 100644
--- a/tests/typed_list/test_type.py
+++ b/tests/typed_list/test_type.py
@@ -1,12 +1,12 @@
 import numpy as np
 import pytest
 
-import theano
+import aesara
+from aesara.tensor.type import TensorType
+from aesara.typed_list.basic import TypedListVariable
+from aesara.typed_list.type import TypedListType
 from tests import unittest_tools as utt
 from tests.tensor.utils import rand_ranged
-from theano.tensor.type import TensorType
-from theano.typed_list.basic import TypedListVariable
-from theano.typed_list.type import TypedListType
 
 
 class TestTypedListType:
@@ -16,7 +16,7 @@ def setup_method(self):
     def test_wrong_input_on_creation(self):
         # Typed list type should raises an
         # error if the argument passed for
-        # type is not a valid theano type
+        # type is not a valid aesara type
 
         with pytest.raises(TypeError):
             TypedListType(None)
@@ -28,7 +28,7 @@ def test_wrong_input_on_filter(self):
         # specified on creation
 
         # list of matrices
-        myType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         with pytest.raises(TypeError):
             myType.filter([4])
@@ -38,22 +38,22 @@ def test_not_a_list_on_filter(self):
         # if no iterable variable is given on input
 
         # list of matrices
-        myType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         with pytest.raises(TypeError):
             myType.filter(4)
 
     def test_type_equality(self):
         # Typed list types should only be equal
-        # when they contains the same theano
+        # when they contains the same aesara
         # variables
 
         # list of matrices
-        myType1 = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType1 = TypedListType(TensorType(aesara.config.floatX, (False, False)))
         # list of matrices
-        myType2 = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType2 = TypedListType(TensorType(aesara.config.floatX, (False, False)))
         # list of scalars
-        myType3 = TypedListType(TensorType(theano.config.floatX, ()))
+        myType3 = TypedListType(TensorType(aesara.config.floatX, ()))
 
         assert myType2 == myType1
         assert myType3 != myType1
@@ -61,7 +61,7 @@ def test_type_equality(self):
     def test_filter_sanity_check(self):
         # Simple test on typed list type filter
 
-        myType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         x = rand_ranged(-1000, 1000, [100, 100])
 
@@ -79,7 +79,7 @@ def test_intern_filter(self):
         assert np.array_equal(myType.filter([x]), [x])
 
     def test_load_alot(self):
-        myType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         x = rand_ranged(-1000, 1000, [10, 10])
         testList = []
@@ -91,7 +91,7 @@ def test_load_alot(self):
     def test_basic_nested_list(self):
         # Testing nested list with one level of depth
 
-        myNestedType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myNestedType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         myType = TypedListType(myNestedType)
 
@@ -102,7 +102,7 @@ def test_basic_nested_list(self):
     def test_comparison_different_depth(self):
         # Nested list with different depth aren't the same
 
-        myNestedType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myNestedType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         myNestedType2 = TypedListType(myNestedType)
 
@@ -114,10 +114,10 @@ def test_nested_list_arg(self):
         # test for the 'depth' optionnal argument
 
         myNestedType = TypedListType(
-            TensorType(theano.config.floatX, (False, False)), 3
+            TensorType(aesara.config.floatX, (False, False)), 3
         )
 
-        myType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         myManualNestedType = TypedListType(TypedListType(TypedListType(myType)))
 
@@ -126,7 +126,7 @@ def test_nested_list_arg(self):
     def test_get_depth(self):
         # test case for get_depth utilitary function
 
-        myType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         myManualNestedType = TypedListType(TypedListType(TypedListType(myType)))
 
@@ -135,7 +135,7 @@ def test_get_depth(self):
     def test_comparison_uneven_nested(self):
         # test for comparison between uneven nested list
 
-        myType = TypedListType(TensorType(theano.config.floatX, (False, False)))
+        myType = TypedListType(TensorType(aesara.config.floatX, (False, False)))
 
         myManualNestedType1 = TypedListType(TypedListType(TypedListType(myType)))
 
@@ -146,7 +146,7 @@ def test_comparison_uneven_nested(self):
 
     def test_variable_is_Typed_List_variable(self):
         mySymbolicVariable = TypedListType(
-            TensorType(theano.config.floatX, (False, False))
+            TensorType(aesara.config.floatX, (False, False))
         )()
 
         assert isinstance(mySymbolicVariable, TypedListVariable)
diff --git a/tests/unittest_tools.py b/tests/unittest_tools.py
index 744932faab..72e87627be 100644
--- a/tests/unittest_tools.py
+++ b/tests/unittest_tools.py
@@ -6,12 +6,12 @@
 import numpy as np
 import pytest
 
-import theano
-from theano.compile.debugmode import str_diagnostic
-from theano.configdefaults import config
-from theano.gradient import verify_grad as orig_verify_grad
-from theano.tensor.math import _allclose
-from theano.tensor.math import add as tt_add
+import aesara
+from aesara.compile.debugmode import str_diagnostic
+from aesara.configdefaults import config
+from aesara.gradient import verify_grad as orig_verify_grad
+from aesara.tensor.math import _allclose
+from aesara.tensor.math import add as tt_add
 
 
 _logger = logging.getLogger("tests.unittest_tools")
@@ -188,7 +188,7 @@ def setup_method(self):
         # and it can be None
         mode = getattr(self, "mode", None)
         if mode is None:
-            mode = theano.compile.get_default_mode()
+            mode = aesara.compile.get_default_mode()
         # This mode seems to be the minimal one including the shape_i
         # optimizations, if we don't want to enumerate them explicitly.
         self.mode = mode.including("canonicalize")
@@ -250,9 +250,9 @@ def _compile_and_check(
                     )
                     break
 
-        outputs_function = theano.function(inputs, outputs, mode=mode)
-        shapes_function = theano.function(inputs, [o.shape for o in outputs], mode=mode)
-        # theano.printing.debugprint(shapes_function)
+        outputs_function = aesara.function(inputs, outputs, mode=mode)
+        shapes_function = aesara.function(inputs, [o.shape for o in outputs], mode=mode)
+        # aesara.printing.debugprint(shapes_function)
         # Check that the Op is removed from the compiled function.
         if check_topo:
             topo_shape = shapes_function.maker.fgraph.toposort()
@@ -375,9 +375,9 @@ def attempt_multiple_times(*args, **kwargs):
 
 def assertFailure_fast(f):
     """A Decorator to handle the test cases that are failing when
-    THEANO_FLAGS =cycle_detection='fast'.
+    AESARA_FLAGS =cycle_detection='fast'.
     """
-    if theano.config.cycle_detection == "fast":
+    if aesara.config.cycle_detection == "fast":
 
         def test_with_assert(*args, **kwargs):
             with pytest.raises(Exception):
diff --git a/theano/d3viz/__init__.py b/theano/d3viz/__init__.py
deleted file mode 100644
index f0425ad853..0000000000
--- a/theano/d3viz/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from theano.d3viz.d3viz import d3viz, d3write
diff --git a/theano/gpuarray/nerv.py b/theano/gpuarray/nerv.py
deleted file mode 100644
index 9dcb6262e4..0000000000
--- a/theano/gpuarray/nerv.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# To prevent flake8 error.
-
-
-raise ImportError(
-    "You are importing theano.gpuarray.nerv. "
-    "This module was removed as it was based on nervanagpu that is now deprecated. "
-    "To still get this module, use Theano 0.9. "
-    "More info about nervanagpu here: https://github.com/NervanaSystems/nervanagpu "
-    "(viewed on 2017/07/05)."
-)
diff --git a/theano/link/jax/__init__.py b/theano/link/jax/__init__.py
deleted file mode 100644
index e70efb7605..0000000000
--- a/theano/link/jax/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-﻿from theano.link.jax.jax_linker import JAXLinker
diff --git a/theano/sandbox/linalg/__init__.py b/theano/sandbox/linalg/__init__.py
deleted file mode 100644
index 67b955b704..0000000000
--- a/theano/sandbox/linalg/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from theano.sandbox.linalg.ops import psd, spectral_radius_bound
-from theano.tensor.nlinalg import (
-    alloc_diag,
-    det,
-    diag,
-    eig,
-    eigh,
-    extract_diag,
-    matrix_inverse,
-    trace,
-)
-from theano.tensor.slinalg import cholesky, eigvalsh, solve
diff --git a/theano/sandbox/solve.py b/theano/sandbox/solve.py
deleted file mode 100644
index 0b48e37713..0000000000
--- a/theano/sandbox/solve.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import warnings
-
-
-from theano.tensor.slinalg import solve  # noqa
-
-message = (
-    "The module theano.sandbox.solve will soon be deprecated.\n"
-    "Please use tensor.slinalg.solve instead."
-)
-
-warnings.warn(message)
diff --git a/theano/tensor/random/__init__.py b/theano/tensor/random/__init__.py
deleted file mode 100644
index d148e9f4ed..0000000000
--- a/theano/tensor/random/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Initialize `RandomVariable` optimizations
-import theano.tensor.random.opt
-import theano.tensor.random.utils