diff --git a/compiler_gym/envs/llvm/datasets/BUILD b/compiler_gym/envs/llvm/datasets/BUILD
index ec5555e968..fd865d8adc 100644
--- a/compiler_gym/envs/llvm/datasets/BUILD
+++ b/compiler_gym/envs/llvm/datasets/BUILD
@@ -8,6 +8,8 @@ py_library(
     name = "datasets",
     srcs = [
         "__init__.py",
+        "csmith.py",
+        "llvm_stress.py",
         "poj104.py",
     ],
     visibility = ["//visibility:public"],
diff --git a/compiler_gym/envs/llvm/datasets/__init__.py b/compiler_gym/envs/llvm/datasets/__init__.py
index ce4d5a50ba..11a01e9e96 100644
--- a/compiler_gym/envs/llvm/datasets/__init__.py
+++ b/compiler_gym/envs/llvm/datasets/__init__.py
@@ -7,6 +7,8 @@
 from typing import Iterable, Optional
 
 from compiler_gym.datasets import Dataset, TarDatasetWithManifest
+from compiler_gym.envs.llvm.datasets.csmith import CsmithBenchmark, CsmithDataset
+from compiler_gym.envs.llvm.datasets.llvm_stress import LlvmStressDataset
 from compiler_gym.envs.llvm.datasets.poj104 import POJ104Dataset, POJ104LegacyDataset
 from compiler_gym.util.runfiles_path import site_data_path
 
@@ -200,8 +202,10 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset
     site_data_base = site_data_base or site_data_path("llvm-v0")
 
     yield BlasDataset(site_data_base=site_data_base, sort_order=0)
+    yield CsmithDataset(site_data_base=site_data_base, sort_order=0)
     yield GitHubDataset(site_data_base=site_data_base, sort_order=0)
     yield LinuxDataset(site_data_base=site_data_base, sort_order=0)
+    yield LlvmStressDataset(site_data_base=site_data_base, sort_order=0)
     yield MibenchDataset(site_data_base=site_data_base, sort_order=0)
     yield NPBDataset(site_data_base=site_data_base, sort_order=0)
     yield OpenCVDataset(site_data_base=site_data_base, sort_order=0)
@@ -212,9 +216,12 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset
 
 __all__ = [
     "BlasDataset",
+    "CsmithDataset",
+    "CsmithBenchmark",
     "get_llvm_datasets",
     "GitHubDataset",
     "LinuxDataset",
+    "LlvmStressDataset",
     "MibenchDataset",
     "NPBDataset",
     "OpenCVDataset",
diff --git a/compiler_gym/envs/llvm/datasets/csmith.py b/compiler_gym/envs/llvm/datasets/csmith.py
new file mode 100644
index 0000000000..1774623521
--- /dev/null
+++ b/compiler_gym/envs/llvm/datasets/csmith.py
@@ -0,0 +1,268 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import io
+import logging
+import subprocess
+import tarfile
+import tempfile
+from pathlib import Path
+from threading import Lock
+from typing import Iterable, List, Optional
+
+from fasteners import InterProcessLock
+
+from compiler_gym.datasets import Benchmark, BenchmarkSource, Dataset
+from compiler_gym.datasets.benchmark import BenchmarkInitError, BenchmarkWithSource
+from compiler_gym.datasets.dataset import DatasetInitError
+from compiler_gym.envs.llvm.llvm_benchmark import ClangInvocation
+from compiler_gym.util.decorators import memoized_property
+from compiler_gym.util.download import download
+from compiler_gym.util.runfiles_path import transient_cache_path
+from compiler_gym.util.truncate import truncate
+
+# The maximum value for the --seed argument to csmith.
+UINT_MAX = (2 ** 32) - 1
+
+
+class CsmithBenchmark(BenchmarkWithSource):
+    """A CSmith benchmark."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._src = None
+
+    @classmethod
+    def create(cls, uri: str, bitcode: bytes, src: bytes) -> Benchmark:
+        """Create a benchmark from paths."""
+        benchmark = cls.from_file_contents(uri, bitcode)
+        benchmark._src = src  # pylint: disable=protected-access
+        return benchmark
+
+    @memoized_property
+    def sources(self) -> Iterable[BenchmarkSource]:
+        return [
+            BenchmarkSource(filename="source.c", contents=self._src),
+        ]
+
+    @property
+    def source(self) -> str:
+        """Return the single source file contents as a string."""
+        return self._src.decode("utf-8")
+
+
+class CsmithDataset(Dataset):
+    """A dataset which uses Csmith to generate programs.
+
+    Csmith is a tool that can generate random conformant C99 programs. It is
+    described in the publication:
+
+        Yang, Xuejun, Yang Chen, Eric Eide, and John Regehr. "Finding and
+        understanding bugs in C compilers." In Proceedings of the 32nd ACM
+        SIGPLAN conference on Programming Language Design and Implementation
+        (PLDI), pp. 283-294. 2011.
+
+    For up-to-date information about Csmith, see:
+
+        https://embed.cs.utah.edu/csmith/
+
+    Note that Csmith is a tool that is used to find errors in compilers. As
+    such, there is a higher likelihood that the benchmark cannot be used for an
+    environment and that :meth:`env.reset()
+    <compiler_gym.envs.CompilerEnv.reset>` will raise
+    :class:`compiler_gym.datasets.BenchmarkInitError`.
+
+    Installation
+    ------------
+
+    Using the CsmithDataset requires building the Csmith binary from source.
+    This is done automatically on the first call to :code:`install()`. Building
+    Csmith requires a working C++ toolchain. Install the required dependencies
+    using: :code:`sudo apt install -y g++ m4` on Linux, or :code:`brew install
+    m4` on macOS. :class:`DatasetInitError` is raised if compilation fails. See
+    the `Csmith repo <https://github.com/csmith-project/csmith#install-csmith>`_
+    for further details.
+    """
+
+    def __init__(self, site_data_base: Path, sort_order: int = 0):
+        super().__init__(
+            name="generator://csmith-v0",
+            description="Random conformant C99 programs",
+            references={
+                "Paper": "http://web.cse.ohio-state.edu/~rountev.1/5343/pdf/pldi11.pdf",
+                "Homepage": "https://embed.cs.utah.edu/csmith/",
+            },
+            license="BSD",
+            site_data_base=site_data_base,
+            sort_order=sort_order,
+            benchmark_class=CsmithBenchmark,
+        )
+        self.csmith_path = self.site_data_path / "bin" / "csmith"
+        csmith_include_dir = self.site_data_path / "include" / "csmith-2.3.0"
+
+        self._installed = False
+        self._build_lock = Lock()
+        self._build_lockfile = self.site_data_path / "build.LOCK"
+        self._build_markerfile = self.site_data_path / ".built"
+
+        # The command that is used to compile an LLVM-IR bitcode file from a
+        # Csmith input. Reads from stdin, writes to stdout.
+        self.clang_compile_command: List[str] = ClangInvocation.from_c_file(
+            "-",  # Read from stdin.
+            copt=[
+                "-xc",
+                "-ferror-limit=1",  # Stop on first error.
+                "-w",  # No warnings.
+                f"-I{csmith_include_dir}",  # Include the Csmith headers.
+            ],
+        ).command(
+            outpath="-"
+        )  # Write to stdout.
+
+    @property
+    def installed(self) -> bool:
+        # Fast path for repeated checks to 'installed' without a disk op.
+        if not self._installed:
+            self._installed = self._build_markerfile.is_file()
+        return self._installed
+
+    def install(self) -> None:
+        """Download and build the Csmith binary."""
+        if self.installed:
+            return
+
+        with self._build_lock, InterProcessLock(self._build_lock):
+            # Repeat the check to see if we have already installed the dataset
+            # now that we have acquired the lock.
+            if not self.installed:
+                self.logger.info("Downloading and building Csmith")
+                self._build_csmith(self.site_data_path, self.logger)
+
+    @staticmethod
+    def _build_csmith(install_root: Path, logger: logging.Logger):
+        """Download, build, and install Csmith to the given directory."""
+        tar_data = io.BytesIO(
+            download(
+                urls=[
+                    "https://github.com/csmith-project/csmith/archive/refs/tags/csmith-2.3.0.tar.gz",
+                ],
+                sha256="ba871c1e5a05a71ecd1af514fedba30561b16ee80b8dd5ba8f884eaded47009f",
+            )
+        )
+        # Csmith uses a standard `configure` + `make install` build process.
+        with tempfile.TemporaryDirectory(
+            dir=transient_cache_path("."), prefix="csmith-"
+        ) as d:
+            with tarfile.open(fileobj=tar_data, mode="r:gz") as arc:
+                arc.extractall(d)
+
+            # The path of the extracted sources.
+            src_dir = Path(d) / "csmith-csmith-2.3.0"
+
+            logger.debug("Configuring Csmith at %s", d)
+            configure = subprocess.Popen(
+                ["./configure", f"--prefix={install_root}"],
+                cwd=src_dir,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                universal_newlines=True,
+            )
+            stdout, stderr = configure.communicate(timeout=600)
+            if configure.returncode:
+                raise DatasetInitError(
+                    "\n".join(
+                        [
+                            "Failed to build Csmith from source, `./configure` failed.",
+                            "You may be missing installation dependencies. Install them using:",
+                            "     linux: `sudo apt install g++ m4`",
+                            "     macOS: `brew install m4`",
+                            "See https://github.com/csmith-project/csmith#install-csmith for more details",
+                            "--- Start `./configure` logs: ---\n",
+                            stdout,
+                            stderr,
+                        ]
+                    )
+                )
+
+            logger.debug("Installing Csmith to %s", install_root)
+            make = subprocess.Popen(
+                ["make", "-j", "install"],
+                cwd=src_dir,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                universal_newlines=True,
+            )
+            stdout, stderr = make.communicate(timeout=600)
+            if make.returncode:
+                raise DatasetInitError(
+                    "\n".join(
+                        [
+                            "Failed to build Csmith from source, `make install` failed.",
+                            "You may be missing installation dependencies. Install them using:",
+                            "     linux: `sudo apt install g++ m4`",
+                            "     macOS: `brew install m4`",
+                            "See https://github.com/csmith-project/csmith#install-csmith for more details",
+                            "--- Start `make install` logs: ---\n",
+                            stdout,
+                            stderr,
+                        ]
+                    )
+                )
+
+            (install_root / ".built").touch()
+
+    @property
+    def size(self) -> int:
+        # Actually 2^32 - 1, but practically infinite for all intents and
+        # purposes.
+        return float("inf")
+
+    def benchmark_uris(self) -> Iterable[str]:
+        return (f"{self.name}/{i}" for i in range(UINT_MAX))
+
+    def benchmark(self, uri: Optional[str] = None) -> CsmithBenchmark:
+        self.install()
+
+        if uri is None or len(uri) <= len(self.name) + 1:
+            seed = self.random.integers(UINT_MAX)
+        else:
+            seed = int(uri.split("/")[-1])
+
+        # Run csmith with the given seed and pipe the output to clang to
+        # assemble a bitcode.
+        self.logger.debug("Exec csmith --seed %d", seed)
+        csmith = subprocess.Popen(
+            [str(self.csmith_path), "--seed", str(seed)],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+        )
+
+        # Generate the C source.
+        src, stderr = csmith.communicate(timeout=300)
+        if csmith.returncode:
+            error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=100)
+            raise OSError(f"Csmith failed with seed {seed}\nError: {error}")
+
+        # Compile to IR.
+        clang = subprocess.Popen(
+            self.clang_compile_command,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        stdout, stderr = clang.communicate(src, timeout=300)
+
+        if csmith.returncode:
+            raise OSError(f"Csmith failed with seed {seed}")
+        if clang.returncode:
+            compile_cmd = " ".join(self.clang_compile_command)
+            error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=100)
+            raise BenchmarkInitError(
+                f"Compilation job failed!\n"
+                f"Csmith seed: {seed}\n"
+                f"Command: {compile_cmd}\n"
+                f"Error: {error}"
+            )
+
+        return self.benchmark_class.create(f"{self.name}/{seed}", stdout, src)
diff --git a/compiler_gym/envs/llvm/datasets/llvm_stress.py b/compiler_gym/envs/llvm/datasets/llvm_stress.py
new file mode 100644
index 0000000000..159f6e0756
--- /dev/null
+++ b/compiler_gym/envs/llvm/datasets/llvm_stress.py
@@ -0,0 +1,80 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import subprocess
+from pathlib import Path
+from typing import Iterable, Optional
+
+from compiler_gym.datasets import Benchmark, Dataset
+from compiler_gym.datasets.benchmark import BenchmarkInitError
+from compiler_gym.third_party import llvm
+
+# The maximum value for the --seed argument to llvm-stress.
+UINT_MAX = (2 ** 32) - 1
+
+
+class LlvmStressDataset(Dataset):
+    """A dataset which uses llvm-stress to generate programs.
+
+    `llvm-stress <https://llvm.org/docs/CommandGuide/llvm-stress.html>`_ is a
+    tool for generating random LLVM-IR files.
+
+    This dataset forces reproducible results by setting the input seed to the
+    generator. The benchmark's URI is the seed, e.g.
+    "generator://llvm-stress-v0/10" is the benchmark generated by llvm-stress
+    using seed 10. The total number of unique seeds is 2^32 - 1.
+
+    Note that llvm-stress is a tool that is used to find errors in LLVM. As
+    such, there is a higher likelihood that the benchmark cannot be used for an
+    environment and that :meth:`env.reset()
+    <compiler_gym.envs.CompilerEnv.reset>` will raise
+    :class:`compiler_gym.datasets.BenchmarkInitError`.
+    """
+
+    def __init__(self, site_data_base: Path, sort_order: int = 0):
+        super().__init__(
+            name="generator://llvm-stress-v0",
+            description="Randomly generated LLVM-IR",
+            references={
+                "Documentation": "https://llvm.org/docs/CommandGuide/llvm-stress.html"
+            },
+            license="Apache License v2.0 with LLVM Exceptions",
+            site_data_base=site_data_base,
+            sort_order=sort_order,
+        )
+
+    @property
+    def size(self) -> int:
+        # Actually 2^32 - 1, but practically infinite for all intents and
+        # purposes.
+        return float("inf")
+
+    def benchmark_uris(self) -> Iterable[str]:
+        return (f"{self.name}/{i}" for i in range(UINT_MAX))
+
+    def benchmark(self, uri: Optional[str] = None) -> Benchmark:
+        if uri is None or len(uri) <= len(self.name) + 1:
+            seed = self.random.integers(UINT_MAX)
+        else:
+            seed = int(uri.split("/")[-1])
+
+        # Run llvm-stress with the given seed and pipe the output to llvm-as to
+        # assemble a bitcode.
+        llvm_stress = subprocess.Popen(
+            [str(llvm.llvm_stress_path()), f"--seed={seed}"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        llvm_as = subprocess.Popen(
+            [str(llvm.llvm_as_path()), "-"],
+            stdin=llvm_stress.stdout,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+        stdout, _ = llvm_as.communicate(timeout=60)
+        if llvm_stress.returncode or llvm_as.returncode:
+            raise BenchmarkInitError("Failed to generate benchmark")
+
+        return Benchmark.from_file_contents(f"{self.name}/{seed}", stdout)
diff --git a/tests/llvm/datasets/BUILD b/tests/llvm/datasets/BUILD
index 760657026d..d3e570e347 100644
--- a/tests/llvm/datasets/BUILD
+++ b/tests/llvm/datasets/BUILD
@@ -4,6 +4,20 @@
 # LICENSE file in the root directory of this source tree.
 load("@rules_python//python:defs.bzl", "py_test")
 
+py_test(
+    name = "csmith_test",
+    timeout = "long",
+    srcs = ["csmith_test.py"],
+    shard_count = 8,
+    deps = [
+        "//compiler_gym/envs/llvm",
+        "//compiler_gym/envs/llvm/datasets",
+        "//tests:test_main",
+        "//tests/pytest_plugins:common",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
 py_test(
     name = "github_test",
     timeout = "long",
@@ -18,6 +32,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "llvm_stress_test",
+    timeout = "long",
+    srcs = ["llvm_stress_test.py"],
+    deps = [
+        "//compiler_gym/envs/llvm",
+        "//compiler_gym/envs/llvm/datasets",
+        "//tests:test_main",
+        "//tests/pytest_plugins:common",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
 py_test(
     name = "poj104_test",
     timeout = "long",
diff --git a/tests/llvm/datasets/csmith_test.py b/tests/llvm/datasets/csmith_test.py
new file mode 100644
index 0000000000..119820dd0f
--- /dev/null
+++ b/tests/llvm/datasets/csmith_test.py
@@ -0,0 +1,50 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Tests for the Csmith dataset."""
+from pathlib import Path
+
+import gym
+import pytest
+
+import compiler_gym.envs.llvm  # noqa register environments
+from compiler_gym.envs.llvm import LlvmEnv
+from compiler_gym.envs.llvm.datasets import CsmithBenchmark, CsmithDataset
+from tests.pytest_plugins.common import skip_on_ci
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"]
+
+
+@pytest.fixture(scope="module")
+def csmith_dataset() -> CsmithDataset:
+    env = gym.make("llvm-v0")
+    try:
+        ds = env.datasets["generator://csmith-v0"]
+    finally:
+        env.close()
+    yield ds
+
+
+def test_csmith_size(csmith_dataset: CsmithDataset):
+    assert csmith_dataset.size == float("inf")
+
+
+@skip_on_ci
+@pytest.mark.parametrize("seed", range(250))
+def test_csmith_random_select(
+    env: LlvmEnv, csmith_dataset: CsmithDataset, seed: int, tmpwd: Path
+):
+    csmith_dataset.seed(seed)
+    benchmark = csmith_dataset.benchmark()
+    assert isinstance(benchmark, CsmithBenchmark)
+    env.reset(benchmark=benchmark)
+
+    assert benchmark.source
+    benchmark.write_sources_to_directory(tmpwd)
+    assert (tmpwd / "source.c").is_file()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/llvm/datasets/llvm_stress_test.py b/tests/llvm/datasets/llvm_stress_test.py
new file mode 100644
index 0000000000..07f4156f9a
--- /dev/null
+++ b/tests/llvm/datasets/llvm_stress_test.py
@@ -0,0 +1,46 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Tests for the AnghaBench dataset."""
+import gym
+import pytest
+
+import compiler_gym.envs.llvm  # noqa register environments
+from compiler_gym.envs.llvm import LlvmEnv
+from compiler_gym.envs.llvm.datasets import LlvmStressDataset
+from tests.pytest_plugins.common import skip_on_ci
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"]
+
+
+@pytest.fixture(scope="module")
+def llvm_stress_dataset() -> LlvmStressDataset:
+    env = gym.make("llvm-v0")
+    try:
+        ds = env.datasets["generator://llvm-stress-v0"]
+    finally:
+        env.close()
+    yield ds
+
+
+def test_llvm_stress_size(llvm_stress_dataset: LlvmStressDataset):
+    assert llvm_stress_dataset.size == float("inf")
+
+
+@skip_on_ci
+@pytest.mark.parametrize("seed", range(250))
+def test_llvm_stress_random_select(
+    env: LlvmEnv, llvm_stress_dataset: LlvmStressDataset, seed: int
+):
+    llvm_stress_dataset.seed(seed)
+    benchmark = llvm_stress_dataset.benchmark()
+    env.observation_space = "InstCountDict"
+    instcount = env.reset(benchmark=benchmark)
+    print(env.ir)  # For debugging in case of error.
+    assert instcount["TotalInstsCount"] > 0
+
+
+if __name__ == "__main__":
+    main()