Skip to content

Commit

Permalink
Caching for Terraform! (#21221)
Browse files Browse the repository at this point in the history
This MR adds caching for Terraform! specifically the providers, which is
the thing you need to download. It uses the [provider
cache](https://developer.hashicorp.com/terraform/cli/config/config-file#provider-plugin-cache).

Pulling in "hashicorp/azurerm" "hashicorp/azuread" (total 200M) goes
from 22s to 2s (33s to 13s including pants startup).
  • Loading branch information
lilatomic authored Aug 4, 2024
1 parent 18c9301 commit 459d39b
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 29 deletions.
3 changes: 2 additions & 1 deletion docs/docs/terraform/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
:::caution Terraform support is in alpha stage
Pants is currently building support for developing and deploying Terraform. Simple use cases might be supported, but many options are missing.

Please share feedback for what you need to use Pants with your Terraform modules and deployments by either [opening a GitHub issue](https://github.com/pantsbuild/pants/issues/new/choose) or [joining our Slack](/community/getting-help)!
Terraform release progress is tracked in the [stability for release issue](https://github.com/pantsbuild/pants/issues/21119).
Please share feedback for what you need to use Pants with your Terraform modules and deployments by commenting on that issue, [opening a new GitHub issue](https://github.com/pantsbuild/pants/issues/new/choose) or [joining our Slack](/community/getting-help)!
:::

## Initial setup
Expand Down
2 changes: 2 additions & 0 deletions docs/notes/2.23.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ The `python_aws_lambda_function` and `python_aws_lambda_layer` targets now allow

#### Terraform

Terraform supports caching providers.

The default version of terraform has been updated from 1.7.1 to 1.9.0.

The `tfsec` linter now works on all supported platforms without extra config.
Expand Down
9 changes: 3 additions & 6 deletions src/python/pants/backend/terraform/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import annotations

import os
import os.path
from dataclasses import dataclass
from typing import Optional
Expand Down Expand Up @@ -88,12 +89,8 @@ async def get_terraform_providers(
),
)
if fetched_deps.exit_code != 0:
raise ProcessExecutionFailure(
fetched_deps.exit_code,
fetched_deps.stdout,
fetched_deps.stderr,
init_process_description,
keep_sandboxes=keep_sandboxes,
raise ProcessExecutionFailure.from_result(
fetched_deps, init_process_description, keep_sandboxes
)

return TerraformDependenciesResponse(fetched_deps.output_digest)
Expand Down
55 changes: 36 additions & 19 deletions src/python/pants/backend/terraform/dependencies_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import annotations

import dataclasses
import json
import textwrap
Expand All @@ -14,7 +16,7 @@
standard_deployment,
terraform_lockfile,
)
from pants.engine.fs import DigestContents, FileContent
from pants.engine.fs import DigestContents, DigestEntries, FileContent, SymlinkEntry
from pants.engine.internals.native_engine import Address
from pants.testutil.rule_runner import RuleRunner

Expand All @@ -24,7 +26,7 @@

def _do_init_terraform(
rule_runner: RuleRunner, standard_deployment: StandardDeployment, initialise_backend: bool
) -> DigestContents:
) -> tuple[DigestContents, DigestEntries]:
rule_runner.write_files(standard_deployment.files)
target = rule_runner.get_target(standard_deployment.target)
field_set = DeployTerraformFieldSet.create(target)
Expand All @@ -39,17 +41,30 @@ def _do_init_terraform(
],
)
initialised_files = rule_runner.request(DigestContents, [result.sources_and_deps])
initialised_entries = rule_runner.request(DigestEntries, [result.sources_and_deps])
assert isinstance(initialised_files, DigestContents)
return initialised_files
return initialised_files, initialised_entries


def find_file(files: DigestContents, pattern: str) -> Optional[FileContent]:
return next((file for file in files if Path(file.path).match(pattern)), None)


def find_link(entries: DigestEntries, pattern: str) -> Optional[SymlinkEntry]:
for entry in entries:
if not isinstance(entry, SymlinkEntry):
continue

if Path(entry.path).match(pattern):
# allow any prefix to account for absolute targets
return entry

return None


def test_init_terraform(rule_runner: RuleRunner, standard_deployment: StandardDeployment) -> None:
"""Test for the happy path of initialising Terraform with a backend config."""
initialised_files = _do_init_terraform(
initialised_files, initialised_links = _do_init_terraform(
rule_runner, standard_deployment, initialise_backend=True
)

Expand All @@ -60,9 +75,9 @@ def test_init_terraform(rule_runner: RuleRunner, standard_deployment: StandardDe
assert stub_tfstate["backend"]["config"]["path"] == str(standard_deployment.state_file)

# Assert dependencies are initialised by checking for the dependency itself
assert find_file(
initialised_files,
".terraform/providers/registry.terraform.io/hashicorp/null/*/*/terraform-provider-null*",
assert find_link(
initialised_links,
".terraform/providers/registry.terraform.io/hashicorp/null/*/*",
), "Did not find expected provider"

# Assert lockfile is included
Expand All @@ -80,7 +95,7 @@ def test_init_terraform_uses_lockfiles(
files={**standard_deployment.files, **{"src/tf/.terraform.lock.hcl": terraform_lockfile}},
)

initialised_files = _do_init_terraform(
initialised_files, initialised_entries = _do_init_terraform(
rule_runner, deployment_with_lockfile, initialise_backend=True
)

Expand All @@ -92,9 +107,9 @@ def test_init_terraform_uses_lockfiles(
), "version in lockfile has changed, we should not have regenerated the lockfile"

# Assert dependencies are initialised to the older version
result_provider = find_file(
initialised_files,
".terraform/providers/registry.terraform.io/hashicorp/null/*/*/terraform-provider-null*",
result_provider = find_link(
initialised_entries,
".terraform/providers/registry.terraform.io/hashicorp/null/*/*",
)
assert result_provider, "Did not find any providers, did we initialise them successfully?"
assert (
Expand All @@ -105,7 +120,7 @@ def test_init_terraform_uses_lockfiles(
def test_init_terraform_without_backends(
rule_runner: RuleRunner, standard_deployment: StandardDeployment
) -> None:
initialised_files = _do_init_terraform(
initialised_files, initialised_entries = _do_init_terraform(
rule_runner, standard_deployment, initialise_backend=False
)

Expand All @@ -115,9 +130,9 @@ def test_init_terraform_without_backends(
), "Terraform state file should not be present if the request was to not initialise the backend"

# The dependencies should still be present
assert find_file(
initialised_files,
".terraform/providers/registry.terraform.io/hashicorp/null/*/*/terraform-provider-null*",
assert find_link(
initialised_entries,
".terraform/providers/registry.terraform.io/hashicorp/null/*/*",
), "Did not find expected provider"


Expand Down Expand Up @@ -157,7 +172,9 @@ def test_init_terraform_with_transitive_module(rule_runner: RuleRunner, tmpdir)
Path(str(tmpdir.mkdir(".terraform").join("state.json"))),
Address("src/tf/deployment", target_name="root"),
)
initialised_files = _do_init_terraform(rule_runner, deployment, initialise_backend=True)
initialised_files, initialised_entries = _do_init_terraform(
rule_runner, deployment, initialise_backend=True
)

assert initialised_files
# Assert that init succeeded and created the modules mapping
Expand All @@ -175,7 +192,7 @@ def test_init_terraform_with_transitive_module(rule_runner: RuleRunner, tmpdir)
)

# Assert that the provider dependency was initialised
assert find_file(
initialised_files,
".terraform/providers/registry.terraform.io/hashicorp/null/*/*/terraform-provider-null*",
assert find_link(
initialised_entries,
".terraform/providers/registry.terraform.io/hashicorp/null/*/*",
), "Did not find expected provider contained in module, did we successfully include it in the files passed to `init`?"
4 changes: 3 additions & 1 deletion src/python/pants/backend/terraform/testutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from pants.core.register import rules as core_rules
from pants.core.util_rules import source_files
from pants.engine import process
from pants.engine.internals.native_engine import Address
from pants.engine.fs import DigestEntries
from pants.engine.internals.native_engine import Address, Digest
from pants.engine.rules import QueryRule
from pants.testutil.rule_runner import RuleRunner

Expand All @@ -50,6 +51,7 @@ def rule_runner_with_auto_approve() -> RuleRunner:
*process.rules(),
QueryRule(DeployProcess, (DeployTerraformFieldSet,)),
QueryRule(TerraformInitResponse, (TerraformInitRequest,)),
QueryRule(DigestEntries, (Digest,)),
],
preserve_tmpdirs=True,
)
Expand Down
55 changes: 53 additions & 2 deletions src/python/pants/backend/terraform/tool.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
"""# Terraform
## Caching: Pants uses the [provider cache](https://developer.hashicorp.com/terraform/cli/config/config-file#provider-plugin-cache) for caching providers.
These are the things that need to be downloaded, so this provides the most speedup.
We use the providers cache instead of identifying and caching the providers individually for a few reasons:
1. This leverages Terraform's existing caching mechanism
2. This is much simpler
3. This incurs almost no overhead, since it is done as part of `terraform init`. We don't need to run more analysers or separately download providers
We didn't use `terraform providers lock` for a few reasons:
1. `terraform providers lock` isn't designed for this usecase, it's designed to create mirrors of providers. It does more work (to set up manifests) and would require us to set more config settings
2. `terraform providers lock` doesn't use itself as a cache. So every time we would want to refresh the cache, we need to download _everything_ again. Even if nothing has changed.
"""

from __future__ import annotations

import os
import shlex
from dataclasses import dataclass
from pathlib import Path
Expand All @@ -13,6 +28,7 @@
ExternalToolRequest,
TemplatedExternalTool,
)
from pants.core.util_rules.system_binaries import BinaryShims, BinaryShimsRequest, GetentBinary
from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest
from pants.engine.fs import EMPTY_DIGEST, Digest
from pants.engine.internals.selectors import Get
Expand Down Expand Up @@ -374,6 +390,14 @@ def default_known_versions(cls):
advanced=True,
)

@property
def plugin_cache_dir(self) -> str:
return "__terraform_filesystem_mirror"

@property
def append_only_caches(self) -> dict[str, str]:
return {"terraform_plugins": self.plugin_cache_dir}


@dataclass(frozen=True)
class TerraformProcess:
Expand All @@ -389,7 +413,10 @@ class TerraformProcess:

@rule
async def setup_terraform_process(
request: TerraformProcess, terraform: TerraformTool, platform: Platform
request: TerraformProcess,
terraform: TerraformTool,
getent_binary: GetentBinary,
platform: Platform,
) -> Process:
downloaded_terraform = await Get(
DownloadedExternalTool,
Expand All @@ -398,7 +425,30 @@ async def setup_terraform_process(
)
env = await Get(EnvironmentVars, EnvironmentVarsRequest(terraform.extra_env_vars))

immutable_input_digests = {"__terraform": downloaded_terraform.digest}
extra_bins = await Get(
BinaryShims,
BinaryShimsRequest,
BinaryShimsRequest.for_paths(getent_binary, rationale="download terraform providers"),
)

path = []
user_path = env.get("PATH")
if user_path:
path.append(user_path)
path.append(extra_bins.path_component)

env = EnvironmentVars(
{
**env,
"PATH": ":".join(path),
"TF_PLUGIN_CACHE_DIR": (os.path.join("{chroot}", terraform.plugin_cache_dir)),
}
)

immutable_input_digests = {
"__terraform": downloaded_terraform.digest,
**extra_bins.immutable_input_digests,
}

def prepend_paths(paths: Tuple[str, ...]) -> Tuple[str, ...]:
return tuple((Path(request.chdir) / path).as_posix() for path in paths)
Expand All @@ -409,6 +459,7 @@ def prepend_paths(paths: Tuple[str, ...]) -> Tuple[str, ...]:
immutable_input_digests=immutable_input_digests,
output_files=prepend_paths(request.output_files),
output_directories=prepend_paths(request.output_directories),
append_only_caches=terraform.append_only_caches,
env=env,
description=request.description,
level=LogLevel.DEBUG,
Expand Down
14 changes: 14 additions & 0 deletions src/python/pants/core/util_rules/system_binaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,10 @@ class FindBinary(BinaryPath):
pass


class GetentBinary(BinaryPath):
pass


class GpgBinary(BinaryPath):
pass

Expand Down Expand Up @@ -907,6 +911,16 @@ async def find_git(system_binaries: SystemBinariesSubsystem.EnvironmentAware) ->
return GitBinary(first_path.path, first_path.fingerprint)


@rule(desc="Finding the `getent` binary", level=LogLevel.DEBUG)
async def find_getent(system_binaries: SystemBinariesSubsystem.EnvironmentAware) -> GetentBinary:
request = BinaryPathRequest(
binary_name="getent", search_path=system_binaries.system_binary_paths
)
paths = await Get(BinaryPaths, BinaryPathRequest, request)
first_path = paths.first_path_or_raise(request, rationale="getent file")
return GetentBinary(first_path.path, first_path.fingerprint)


@rule(desc="Finding the `gpg` binary", level=LogLevel.DEBUG)
async def find_gpg(system_binaries: SystemBinariesSubsystem.EnvironmentAware) -> GpgBinary:
request = BinaryPathRequest(binary_name="gpg", search_path=system_binaries.system_binary_paths)
Expand Down
12 changes: 12 additions & 0 deletions src/python/pants/engine/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,18 @@ def try_decode(content: bytes) -> str:
)
super().__init__("\n".join(err_strings))

@classmethod
def from_result(
cls, result: FallibleProcessResult, description: str, keep_sandboxes: KeepSandboxes
) -> ProcessExecutionFailure:
return cls(
result.exit_code,
result.stdout,
result.stderr,
description,
keep_sandboxes=keep_sandboxes,
)


@rule
def get_multi_platform_request_description(req: Process) -> ProductDescription:
Expand Down

0 comments on commit 459d39b

Please sign in to comment.