Skip to content

Commit

Permalink
[internal] Don't download Go third-party dependencies multiple times (#…
Browse files Browse the repository at this point in the history
…13352)

Turns out that #13339 didn't actually work - we are redownloading the same modules several times with Go! Downloads happen when:

1. determining `GoModInfo` (once per `go.mod`)
2. `AllDownloadedModules` (once per `go.mod`)
3.  Determining metadata for each third-party package (once per third-party module)
4. Determining metadata for each first-party package (once per first-party package/directory)

This PR fixes it so that we only download modules a single time, once per `go.mod`.

To fix this, we stop treating third-party modules like first-party modules, i.e. we stop `cd`-ing into its downloaded directory and running `go list` directly in it, using its own `go.mod` and `go.sum`. That requires that the chroot has all of the module's transitive dependencies present, and it also resulted in issues like #13138. Instead, the much simpler thing to do is run `go list '...'` to do all third-party analysis in a single swoop. That gets us all the analysis we need.

We also extract the relevant `.go` files from all of the downloaded `GOPATH`, i.e. all the downloaded modules. For compilation, all we need is the `.go` files + the metadata we had earlier calculated. Compilation doesn't need access to anything else like other package's.

For first-party analysis, we copy the whole `GOPATH` into the chroot. (This is really slow! We need something like #12716 to fix this.)

## Benchmark

Running in https://github.com/toolchainlabs/remote-api-tools.

Before:

```
❯ hyperfine -r 5 './pants_from_sources --no-process-execution-local-cache --no-pantsd package ::'
  Time (mean ± σ):     36.467 s ±  0.603 s    [User: 41.109 s, System: 38.095 s]
  Range (min … max):   35.518 s … 37.137 s    5 runs
```

Fixing only third-party analysis:

```
❯ hyperfine -r 5 --show-output './pants_from_sources --no-process-execution-local-cache --no-pantsd package ::'
  Time (mean ± σ):     29.880 s ±  0.901 s    [User: 29.564 s, System: 15.281 s]
  Range (min … max):   28.835 s … 31.312 s    5 runs
```

Fixing everything:

```
❯ hyperfine -r 5 './pants_from_sources --no-process-execution-local-cache --no-pantsd package ::'
  Time (mean ± σ):     26.633 s ±  2.283 s    [User: 24.115 s, System: 30.453 s]
  Range (min … max):   24.570 s … 30.037 s    5 runs
```

[ci skip-rust]
[ci skip-build-wheels]
  • Loading branch information
Eric-Arellano authored Oct 27, 2021
1 parent 710170f commit 07a308e
Show file tree
Hide file tree
Showing 12 changed files with 677 additions and 868 deletions.
35 changes: 8 additions & 27 deletions src/python/pants/backend/go/target_type_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
GoImportPathField,
GoModPackageSourcesField,
GoModTarget,
GoThirdPartyModulePathField,
GoThirdPartyModuleVersionField,
GoThirdPartyPackageDependenciesField,
GoThirdPartyPackageTarget,
)
Expand All @@ -32,8 +30,8 @@
from pants.backend.go.util_rules.go_mod import GoModInfo, GoModInfoRequest
from pants.backend.go.util_rules.import_analysis import GoStdLibImports
from pants.backend.go.util_rules.third_party_pkg import (
ThirdPartyModuleInfo,
ThirdPartyModuleInfoRequest,
AllThirdPartyPackages,
AllThirdPartyPackagesRequest,
ThirdPartyPkgInfo,
ThirdPartyPkgInfoRequest,
)
Expand Down Expand Up @@ -153,12 +151,7 @@ async def inject_go_third_party_package_dependencies(
tgt = wrapped_target.target
pkg_info = await Get(
ThirdPartyPkgInfo,
ThirdPartyPkgInfoRequest(
module_path=tgt[GoThirdPartyModulePathField].value,
version=tgt[GoThirdPartyModuleVersionField].value,
import_path=tgt[GoImportPathField].value,
go_mod_stripped_digest=go_mod_info.stripped_digest,
),
ThirdPartyPkgInfoRequest(tgt[GoImportPathField].value, go_mod_info.stripped_digest),
)

inferred_dependencies = []
Expand Down Expand Up @@ -214,16 +207,9 @@ async def generate_targets_from_go_mod(
request.generator[GoModPackageSourcesField].path_globs(files_not_found_behavior),
),
)
all_module_info = await MultiGet(
Get(
ThirdPartyModuleInfo,
ThirdPartyModuleInfoRequest(
module_path=module_descriptor.path,
version=module_descriptor.version,
go_mod_stripped_digest=go_mod_info.stripped_digest,
),
)
for module_descriptor in go_mod_info.modules
all_third_party_packages = await Get(
AllThirdPartyPackages,
AllThirdPartyPackagesRequest(go_mod_info.stripped_digest),
)

dir_to_filenames = group_by_dir(go_paths.files)
Expand Down Expand Up @@ -251,11 +237,7 @@ def create_first_party_package_tgt(dir: str) -> GoFirstPartyPackageTarget:

def create_third_party_package_tgt(pkg_info: ThirdPartyPkgInfo) -> GoThirdPartyPackageTarget:
return GoThirdPartyPackageTarget(
{
GoThirdPartyModulePathField.alias: pkg_info.module_path,
GoThirdPartyModuleVersionField.alias: pkg_info.version,
GoImportPathField.alias: pkg_info.import_path,
},
{GoImportPathField.alias: pkg_info.import_path},
# E.g. `src/go:mod#github.com/google/uuid`.
generator_addr.create_generated(pkg_info.import_path),
union_membership,
Expand All @@ -264,8 +246,7 @@ def create_third_party_package_tgt(pkg_info: ThirdPartyPkgInfo) -> GoThirdPartyP

third_party_pkgs = (
create_third_party_package_tgt(pkg_info)
for module_info in all_module_info
for pkg_info in module_info.values()
for pkg_info in all_third_party_packages.import_paths_to_pkg_info.values()
)
return GeneratedTargets(request.generator, (*first_party_pkgs, *third_party_pkgs))

Expand Down
90 changes: 27 additions & 63 deletions src/python/pants/backend/go/target_type_rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
GoFirstPartyPackageTarget,
GoImportPathField,
GoModTarget,
GoThirdPartyModulePathField,
GoThirdPartyModuleVersionField,
GoThirdPartyPackageTarget,
)
from pants.backend.go.util_rules import first_party_pkg, go_mod, sdk, third_party_pkg
Expand Down Expand Up @@ -79,23 +77,21 @@ def test_go_package_dependency_inference(rule_runner: RuleRunner) -> None:
module go.example.com/foo
go 1.17
require github.com/google/go-cmp v0.4.0
require github.com/google/uuid v1.3.0
"""
),
"foo/go.sum": dedent(
"""\
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
"""
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
"""
),
"foo/pkg/foo.go": dedent(
"""\
package pkg
import "github.com/google/go-cmp/cmp"
func grok(left, right string) bool {
return cmp.Equal(left, right)
import "github.com/google/uuid"
func Grok() string {
return uuid.Foo()
}
"""
),
Expand Down Expand Up @@ -126,7 +122,7 @@ def test_go_package_dependency_inference(rule_runner: RuleRunner) -> None:
[InferGoPackageDependenciesRequest(tgt2[GoFirstPartyPackageSourcesField])],
)
assert inferred_deps2.dependencies == FrozenOrderedSet(
[Address("foo", generated_name="github.com/google/go-cmp/cmp")]
[Address("foo", generated_name="github.com/google/uuid")]
)

# Compilation failures should not blow up Pants.
Expand Down Expand Up @@ -154,6 +150,7 @@ def test_generate_package_targets(rule_runner: RuleRunner) -> None:
require (
github.com/google/go-cmp v0.4.0
github.com/google/uuid v1.2.0
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect
)
"""
),
Expand Down Expand Up @@ -189,15 +186,9 @@ def gen_first_party_tgt(rel_dir: str, sources: list[str]) -> GoFirstPartyPackage
residence_dir=os.path.join("src/go", rel_dir).rstrip("/"),
)

def gen_third_party_tgt(
mod_path: str, version: str, import_path: str
) -> GoThirdPartyPackageTarget:
def gen_third_party_tgt(import_path: str) -> GoThirdPartyPackageTarget:
return GoThirdPartyPackageTarget(
{
GoImportPathField.alias: import_path,
GoThirdPartyModulePathField.alias: mod_path,
GoThirdPartyModuleVersionField.alias: version,
},
{GoImportPathField.alias: import_path},
Address("src/go", generated_name=import_path),
)

Expand All @@ -207,44 +198,21 @@ def gen_third_party_tgt(
gen_first_party_tgt("", ["hello.go"]),
gen_first_party_tgt("subdir", ["subdir/f.go", "subdir/f2.go"]),
gen_first_party_tgt("another_dir/subdir", ["another_dir/subdir/f.go"]),
gen_third_party_tgt("github.com/google/uuid", "v1.2.0", "github.com/google/uuid"),
gen_third_party_tgt(
"github.com/google/go-cmp", "v0.4.0", "github.com/google/go-cmp/cmp"
),
gen_third_party_tgt(
"github.com/google/go-cmp", "v0.4.0", "github.com/google/go-cmp/cmp/cmpopts"
),
gen_third_party_tgt(
"github.com/google/go-cmp", "v0.4.0", "github.com/google/go-cmp/cmp/internal/diff"
),
gen_third_party_tgt(
"github.com/google/go-cmp", "v0.4.0", "github.com/google/go-cmp/cmp/internal/flags"
),
gen_third_party_tgt(
"github.com/google/go-cmp",
"v0.4.0",
"github.com/google/go-cmp/cmp/internal/function",
),
gen_third_party_tgt(
"github.com/google/go-cmp",
"v0.4.0",
"github.com/google/go-cmp/cmp/internal/testprotos",
),
gen_third_party_tgt(
"github.com/google/go-cmp",
"v0.4.0",
"github.com/google/go-cmp/cmp/internal/teststructs",
),
gen_third_party_tgt(
"github.com/google/go-cmp", "v0.4.0", "github.com/google/go-cmp/cmp/internal/value"
),
gen_third_party_tgt(
"golang.org/x/xerrors", "v0.0.0-20191204190536-9bdfabe68543", "golang.org/x/xerrors"
),
gen_third_party_tgt(
"golang.org/x/xerrors",
"v0.0.0-20191204190536-9bdfabe68543",
"golang.org/x/xerrors/internal",
*(
gen_third_party_tgt(pkg)
for pkg in (
"github.com/google/uuid",
"github.com/google/go-cmp/cmp",
"github.com/google/go-cmp/cmp/cmpopts",
"github.com/google/go-cmp/cmp/internal/diff",
"github.com/google/go-cmp/cmp/internal/flags",
"github.com/google/go-cmp/cmp/internal/function",
"github.com/google/go-cmp/cmp/internal/testprotos",
"github.com/google/go-cmp/cmp/internal/teststructs",
"github.com/google/go-cmp/cmp/internal/value",
"golang.org/x/xerrors",
"golang.org/x/xerrors/internal",
)
),
},
)
Expand All @@ -261,11 +229,7 @@ def test_package_targets_cannot_be_manually_created() -> None:
)
with pytest.raises(InvalidTargetException):
GoThirdPartyPackageTarget(
{
GoImportPathField.alias: "foo",
GoThirdPartyModulePathField.alias: "foo",
GoThirdPartyModuleVersionField.alias: "foo",
},
{GoImportPathField.alias: "foo"},
Address("foo"),
)

Expand Down
29 changes: 1 addition & 28 deletions src/python/pants/backend/go/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,36 +188,9 @@ class GoThirdPartyPackageDependenciesField(Dependencies):
pass


class GoThirdPartyModulePathField(StringField):
alias = "module_path"
help = (
"The module path of the third-party module this package comes from, "
"e.g. `github.com/google/go-cmp`.\n\n"
"This field should not be overridden; use the value from target generation."
)
required = True
value: str


class GoThirdPartyModuleVersionField(StringField):
alias = "version"
help = (
"The version of the third-party module this package comes from, e.g. `v0.4.0`.\n\n"
"This field should not be overridden; use the value from target generation."
)
required = True
value: str


class GoThirdPartyPackageTarget(Target):
alias = "go_third_party_package"
core_fields = (
*COMMON_TARGET_FIELDS,
GoThirdPartyPackageDependenciesField,
GoThirdPartyModulePathField,
GoThirdPartyModuleVersionField,
GoImportPathField,
)
core_fields = (*COMMON_TARGET_FIELDS, GoThirdPartyPackageDependenciesField, GoImportPathField)
help = (
"A package from a third-party Go module.\n\n"
"You should not explicitly create this target in BUILD files. Instead, add a `go_mod` "
Expand Down
17 changes: 5 additions & 12 deletions src/python/pants/backend/go/util_rules/build_pkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
GoFirstPartyPackageSourcesField,
GoFirstPartyPackageSubpathField,
GoImportPathField,
GoThirdPartyModulePathField,
GoThirdPartyModuleVersionField,
GoThirdPartyPackageDependenciesField,
)
from pants.backend.go.util_rules.assembly import (
AssemblyPostCompilation,
Expand All @@ -32,7 +31,6 @@
from pants.engine.process import FallibleProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.engine.target import Dependencies, DependenciesRequest, UnexpandedTargets, WrappedTarget
from pants.util.dirutil import fast_relpath
from pants.util.frozendict import FrozenDict
from pants.util.strutil import path_safe

Expand Down Expand Up @@ -262,22 +260,17 @@ async def setup_build_go_package_target_request(
go_file_names += _first_party_pkg_info.test_files
s_file_names = _first_party_pkg_info.s_files

elif target.has_field(GoThirdPartyModulePathField):
_module_path = target[GoThirdPartyModulePathField].value
subpath = fast_relpath(import_path, _module_path)

elif target.has_field(GoThirdPartyPackageDependenciesField):
_go_mod_address = target.address.maybe_convert_to_target_generator()
_go_mod_info = await Get(GoModInfo, GoModInfoRequest(_go_mod_address))
_third_party_pkg_info = await Get(
ThirdPartyPkgInfo,
ThirdPartyPkgInfoRequest(
import_path=import_path,
module_path=_module_path,
version=target[GoThirdPartyModuleVersionField].value,
go_mod_stripped_digest=_go_mod_info.stripped_digest,
import_path=import_path, go_mod_stripped_digest=_go_mod_info.stripped_digest
),
)

subpath = _third_party_pkg_info.subpath
digest = _third_party_pkg_info.digest
go_file_names = _third_party_pkg_info.go_files
s_file_names = _third_party_pkg_info.s_files
Expand All @@ -297,7 +290,7 @@ async def setup_build_go_package_target_request(
for tgt in all_deps
if (
tgt.has_field(GoFirstPartyPackageSourcesField)
or tgt.has_field(GoThirdPartyModulePathField)
or tgt.has_field(GoThirdPartyPackageDependenciesField)
)
)
direct_dependencies = []
Expand Down
6 changes: 3 additions & 3 deletions src/python/pants/backend/go/util_rules/build_pkg_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def test_build_third_party_pkg_target(rule_runner: RuleRunner) -> None:
rule_runner,
Address("", target_name="mod", generated_name=import_path),
expected_import_path=import_path,
expected_subpath="",
expected_subpath="github.com/google/uuid@v1.3.0",
expected_go_file_names=[
"dce.go",
"doc.go",
Expand Down Expand Up @@ -378,7 +378,7 @@ def test_build_target_with_dependencies(rule_runner: RuleRunner) -> None:
rule_runner,
Address("", target_name="mod", generated_name=xerrors_internal_import_path),
expected_import_path=xerrors_internal_import_path,
expected_subpath="internal",
expected_subpath="golang.org/x/xerrors@v0.0.0-20191204190536-9bdfabe68543/internal",
expected_go_file_names=["internal.go"],
expected_direct_dependency_import_paths=[],
expected_transitive_dependency_import_paths=[],
Expand All @@ -389,7 +389,7 @@ def test_build_target_with_dependencies(rule_runner: RuleRunner) -> None:
rule_runner,
Address("", target_name="mod", generated_name=xerrors_import_path),
expected_import_path=xerrors_import_path,
expected_subpath="",
expected_subpath="golang.org/x/xerrors@v0.0.0-20191204190536-9bdfabe68543",
expected_go_file_names=[
"adaptor.go",
"doc.go",
Expand Down
16 changes: 12 additions & 4 deletions src/python/pants/backend/go/util_rules/first_party_pkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
)
from pants.backend.go.util_rules.go_mod import GoModInfo, GoModInfoRequest
from pants.backend.go.util_rules.sdk import GoSdkProcess
from pants.backend.go.util_rules.third_party_pkg import (
AllThirdPartyPackages,
AllThirdPartyPackagesRequest,
)
from pants.build_graph.address import Address
from pants.engine.engine_aware import EngineAwareParameter
from pants.engine.fs import Digest, MergeDigests
Expand Down Expand Up @@ -80,11 +84,15 @@ async def compute_first_party_package_info(
import_path = target[GoImportPathField].value
subpath = target[GoFirstPartyPackageSubpathField].value

pkg_sources = await Get(
HydratedSources, HydrateSourcesRequest(target[GoFirstPartyPackageSourcesField])
pkg_sources, all_third_party_packages = await MultiGet(
Get(HydratedSources, HydrateSourcesRequest(target[GoFirstPartyPackageSourcesField])),
Get(AllThirdPartyPackages, AllThirdPartyPackagesRequest(go_mod_info.stripped_digest)),
)
input_digest = await Get(
Digest, MergeDigests([pkg_sources.snapshot.digest, go_mod_info.digest])
Digest,
MergeDigests(
[pkg_sources.snapshot.digest, go_mod_info.digest, all_third_party_packages.digest]
),
)

result = await Get(
Expand All @@ -93,7 +101,7 @@ async def compute_first_party_package_info(
input_digest=input_digest,
command=("list", "-json", f"./{subpath}"),
description=f"Determine metadata for {request.address}",
working_dir=request.address.spec_path,
working_dir=request.address.spec_path, # i.e. the `go.mod`'s directory.
),
)
if result.exit_code != 0:
Expand Down
Loading

0 comments on commit 07a308e

Please sign in to comment.