diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fcc6b992..74707af6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,12 +15,37 @@ jobs:
           python-version: 3.8
           environment-file: environment.yml
           activate-environment: lecture-datascience
+      - name: Install myst-nb
+        run: |
+          pip install git+https://github.com/executablebooks/MyST-NB@master
+      - name: Install sphinx
+        run: |
+          pip install sphinx==3.5.0
+      - name: Install latex dependencies
+        run: |
+          sudo apt-get -qq update
+          sudo apt-get install -y     \
+            texlive-latex-recommended \
+            texlive-latex-extra       \
+            texlive-fonts-recommended \
+            texlive-fonts-extra       \
+            texlive-xetex             \
+            latexmk                   \
+            xindy                     \
+            imagemagick
       - name: Display Conda Environment Versions
         shell: bash -l {0}
         run: conda list
       - name: Display Pip Versions
         shell: bash -l {0}
         run: pip list
+      # Build Assets (Download Notebooks and PDF via LaTeX)
+      - name: Build PDF from LaTeX
+        shell: bash -l {0}
+        run: |
+          jb build lectures --builder pdflatex --path-output ./ -n --keep-going
+          mkdir -p _build/html/_pdf
+          cp -u _build/latex/*.pdf _build/html/_pdf
       - name: Download "build" folder (cache)
         uses: dawidd6/action-download-artifact@v2
         with:
@@ -49,6 +74,11 @@ jobs:
         run: |
           mkdir -p _build/html/assets/data
           cp -a lectures/_data/. _build/html/assets/data/
+      - name: Save Build as Artifact
+        uses: actions/upload-artifact@v1
+        with:
+          name: _build
+          path: _build
       - name: Preview Deploy to Netlify
         uses: nwtgck/actions-netlify@v1.1
         with:
@@ -58,4 +88,4 @@ jobs:
           deploy-message: "Preview Deploy from GitHub Actions"
         env:
           NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
-          NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
+          NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
index 7852bcbf..87692d06 100644
--- a/environment.yml
+++ b/environment.yml
@@ -6,13 +6,14 @@ dependencies:
   - anaconda=2021.05
   - pip
   - pip:
-    - jupyter-book>=0.12.1,<0.13
+    - sphinx==3.5.0
+    - jupyter-book>=0.12.1,<0.13.0
     - sphinxext-rediraffe
     - sphinx-multitoc-numbering
-    - sphinx-exercise
+    - sphinx-exercise==0.2.1
     - joblib
     - interpolation
-    - sphinx-tojupyter
+    - sphinx-tojupyter==0.1.2
     - fiona 
     - geopandas 
     - pyLDAvis >= 3.3.0
diff --git a/lectures/_config.yml b/lectures/_config.yml
index 2eb21f74..bc628f10 100644
--- a/lectures/_config.yml
+++ b/lectures/_config.yml
@@ -10,6 +10,10 @@ execute:
 html:
   baseurl: https://datascience.quantecon.org/
 
+latex:
+   latex_documents:
+      targetname: datascience-book.tex
+
 sphinx:
   extra_extensions: [sphinx_multitoc_numbering, sphinx_exercise, sphinx_tojupyter]
   config:
@@ -32,9 +36,9 @@ sphinx:
         binderhub_url: https://mybinder.org
       google_analytics_id: UA-54984338-5
       persistent_sidebar: true
-    mathjax2_config:
+    mathjax_config:
       TeX:
-        extensions: ["autobold.js"]
+        extensions: ["autobold.js", "text-macros.js"]
         Macros:
           "argmax" : "arg\\,max"
           "argmin" : "arg\\,min"
@@ -45,7 +49,7 @@ sphinx:
         SVG:
           scale: 0.92,
           useGlobalCache: true
-    mathjax_path: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML
+    mathjax_path: https://cdn.jsdelivr.net/npm/mathjax@2.7.9/MathJax.js?config=TeX-MML-AM_CHTML
     tojupyter_static_file_path: ["_static"]
     tojupyter_target_html: true
     tojupyter_urlpath: "https://datascience.quantecon.org/"
diff --git a/lectures/applications/heterogeneity.md b/lectures/applications/heterogeneity.md
index 84bb4cfa..28e91f80 100644
--- a/lectures/applications/heterogeneity.md
+++ b/lectures/applications/heterogeneity.md
@@ -101,12 +101,12 @@ When treatment is randomly assigned, we can estimate average treatment
 effects because
 
 $$
-\begin{align*}
+\begin{aligned}
 E[y_i(1) - y_i(0) ] = & E[y_i(1)] - E[y_i(0)] \\
 & \text{random assignment } \\
 = & E[y_i(1) | d_i = 1] - E[y_i(0) | d_i = 0] \\
 = & E[y_i | d_i = 1] - E[y_i | d_i = 0 ]
-\end{align*}
+\end{aligned}
 $$
 
 ### Average Treatment Effects
@@ -164,12 +164,12 @@ logic that lets us estimate unconditional average treatment effects
 also suggests that we can estimate conditional average treatment effects.
 
 $$
-\begin{align*}
+\begin{aligned}
 E[y_i(1) - y_i(0) |X_i=x] = & E[y_i(1)|X_i = x] - E[y_i(0)|X_i=x] \\
 & \text{random assignment } \\
 = & E[y_i(1) | d_i = 1, X_i=x] - E[y_i(0) | d_i = 0, X_i=x] \\
 = & E[y_i | d_i = 1, X_i = x] - E[y_i | d_i = 0, X_i=x ]
-\end{align*}
+\end{aligned}
 $$
 
 Conditional average treatment effects tell us whether there are
@@ -209,7 +209,6 @@ $S(x)$ approximates $s_0(x)$ is to look at the best linear
 projection of $s_0(x)$ on $S(x)$.
 
 $$
-\DeclareMathOperator*{\argmin}{arg\,min}
 \beta_0, \beta_1 = \argmin_{b_0,b_1} E[(s_0(x) -
 b_0 - b_1 (S(x)-E[S(x)]))^2]
 $$
diff --git a/lectures/applications/regression.md b/lectures/applications/regression.md
index 0545d969..e3d5f81e 100644
--- a/lectures/applications/regression.md
+++ b/lectures/applications/regression.md
@@ -123,7 +123,7 @@ only the livable square footage of the home.
 The linear regression model for this situation is
 
 $$
-\log(\text{price}) = \beta_0 + \beta_1 \text{sqft_living} + \epsilon
+\log(\text{price}) = \beta_0 + \beta_1 \text{sqft\_living} + \epsilon
 $$
 
 $\beta_0$ and $\beta_1$ are called parameters (also coefficients or
@@ -132,14 +132,14 @@ that best fit the data.
 
 $\epsilon$ is the error term. It would be unusual for the observed
 $\log(\text{price})$ to be an exact linear function of
-$\text{sqft_living}$. The error term captures the deviation of
-$\log(\text{price})$ from a linear function of $\text{sqft_living}$.
+$\text{sqft\_living}$. The error term captures the deviation of
+$\log(\text{price})$ from a linear function of $\text{sqft\_living}$.
 
 The linear regression algorithm will choose the parameters that minimize the
 *mean squared error* (MSE) function, which for our example is written.
 
 $$
-\frac{1}{N} \sum_{i=1}^N \left(\log(\text{price}_i) - (\beta_0 + \beta_1 \text{sqft_living}_i) \right)^2
+\frac{1}{N} \sum_{i=1}^N \left(\log(\text{price}_i) - (\beta_0 + \beta_1 \text{sqft\_living}_i) \right)^2
 $$
 
 The output of this algorithm is the straight line (hence linear) that passes as
@@ -218,7 +218,7 @@ Suppose that in addition to `sqft_living`, we also wanted to use the `bathrooms`
 In this case, the linear regression model is
 
 $$
-\log(\text{price}) = \beta_0 + \beta_1 \text{sqft_living} +
+\log(\text{price}) = \beta_0 + \beta_1 \text{sqft\_living} +
 \beta_2 \text{bathrooms} + \epsilon
 $$
 
@@ -227,7 +227,7 @@ We could keep adding one variable at a time, along with a new $\beta_{j}$ coeffi
 Let's write this equation in vector/matrix form as
 
 $$
-\underbrace{\begin{bmatrix} \log(\text{price}_1) \\ \log(\text{price}_2) \\ \vdots \\ \log(\text{price}_N)\end{bmatrix}}_Y = \underbrace{\begin{bmatrix} 1 & \text{sqft_living}_1 & \text{bathrooms}_1 \\ 1 & \text{sqft_living}_2 & \text{bathrooms}_2 \\ \vdots & \vdots & \vdots \\ 1 & \text{sqft_living}_N & \text{bathrooms}_N \end{bmatrix}}_{X} \underbrace{\begin{bmatrix} \beta_0 \\ \beta_1 \\ \beta_2 \end{bmatrix}}_{\beta} + \epsilon
+\underbrace{\begin{bmatrix} \log(\text{price}_1) \\ \log(\text{price}_2) \\ \vdots \\ \log(\text{price}_N)\end{bmatrix}}_Y = \underbrace{\begin{bmatrix} 1 & \text{sqft\_living}_1 & \text{bathrooms}_1 \\ 1 & \text{sqft\_living}_2 & \text{bathrooms}_2 \\ \vdots & \vdots & \vdots \\ 1 & \text{sqft\_living}_N & \text{bathrooms}_N \end{bmatrix}}_{X} \underbrace{\begin{bmatrix} \beta_0 \\ \beta_1 \\ \beta_2 \end{bmatrix}}_{\beta} + \epsilon
 $$
 
 Notice that we can add as many columns to $X$ as we'd like and the linear
diff --git a/lectures/problem_sets/problem_set_3.md b/lectures/problem_sets/problem_set_3.md
index baa5fb79..dfcc6cb2 100644
--- a/lectures/problem_sets/problem_set_3.md
+++ b/lectures/problem_sets/problem_set_3.md
@@ -197,10 +197,10 @@ face value $M$, yield to maturity $i$, and periods to maturity
 $N$ is
 
 $$
-\begin{align*}
+\begin{aligned}
     P &= \left(\sum_{n=1}^N \frac{C}{(i+1)^n}\right) + \frac{M}{(1+i)^N} \\
         &= C \left(\frac{1 - (1+i)^{-N}}{i} \right) + M(1+i)^{-N}
-\end{align*}
+\end{aligned}
 $$
 
 In the code cell below, we have defined variables for `i`, `M` and `C`.
diff --git a/lectures/python_fundamentals/functions.md b/lectures/python_fundamentals/functions.md
index c0c460c0..cdf28262 100644
--- a/lectures/python_fundamentals/functions.md
+++ b/lectures/python_fundamentals/functions.md
@@ -633,10 +633,10 @@ that can be interchanged.
 That is, the following are identical.
 
 $$
-\begin{eqnarray}
+\begin{aligned}
     f(K, L) &= z\, K^{\alpha} L^{1-\alpha}\\
     f(K_2, L_2) &= z\, K_2^{\alpha} L_2^{1-\alpha}
-\end{eqnarray}
+\end{aligned}
 $$
 
 The same concept applies to Python functions, where the arguments are just
diff --git a/lectures/scientific/applied_linalg.md b/lectures/scientific/applied_linalg.md
index 44e1e2fa..45e7cecc 100644
--- a/lectures/scientific/applied_linalg.md
+++ b/lectures/scientific/applied_linalg.md
@@ -343,11 +343,11 @@ $\begin{bmatrix} 1 & 2 \\ 3 & 1 \end{bmatrix}$ then we can multiply both sides b
 to get
 
 $$
-\begin{align*}
+\begin{aligned}
 \begin{bmatrix} 1 & 2 \\ 3 & 1 \end{bmatrix}^{-1}\begin{bmatrix} 1 & 2 \\ 3 & 1 \end{bmatrix} \begin{bmatrix} x_1 \\ x_2 \end{bmatrix} &= \begin{bmatrix} 1 & 2 \\ 3 & 1 \end{bmatrix}^{-1}\begin{bmatrix} 3 \\ 4 \end{bmatrix} \\
 I \begin{bmatrix} x_1 \\ x_2 \end{bmatrix} &= \begin{bmatrix} 1 & 2 \\ 3 & 1 \end{bmatrix}^{-1} \begin{bmatrix} 3 \\ 4 \end{bmatrix} \\
  \begin{bmatrix} x_1 \\ x_2 \end{bmatrix} &= \begin{bmatrix} 1 & 2 \\ 3 & 1 \end{bmatrix}^{-1} \begin{bmatrix} 3 \\ 4 \end{bmatrix}
-\end{align*}
+ \end{aligned}
 $$
 
 Computing the inverse requires that a matrix be square and satisfy some other conditions
diff --git a/lectures/scientific/numpy_arrays.md b/lectures/scientific/numpy_arrays.md
index a01addc6..072e5878 100644
--- a/lectures/scientific/numpy_arrays.md
+++ b/lectures/scientific/numpy_arrays.md
@@ -521,10 +521,10 @@ face value $M$, yield to maturity $i$, and periods to maturity
 $N$ is
 
 $$
-\begin{align*}
+\begin{aligned}
     P &= \left(\sum_{n=1}^N \frac{C}{(i+1)^n}\right) + \frac{M}{(1+i)^N} \\
       &= C \left(\frac{1 - (1+i)^{-N}}{i} \right) + M(1+i)^{-N}
-\end{align*}
+\end{aligned}
 $$
 
 In the code cell below, we have defined variables for `i`, `M` and `C`.