diff --git a/CHANGES.md b/CHANGES.md index 5c7733950aeb..bbf291ed8f41 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -62,8 +62,8 @@ ## New Features / Improvements -* Allow prebuilding large images when using `--prebuild_sdk_container_engine=cloud_build`, like images depending on `tensorflow` or `torch` ([#27023](https://github.com/apache/beam/pull/27023)) -* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* Allow prebuilding large images when using `--prebuild_sdk_container_engine=cloud_build`, like images depending on `tensorflow` or `torch` ([#27023](https://github.com/apache/beam/pull/27023)). +* Disabled `pip` cache when installing packages on the workers. This reduces the size of prebuilt Python container images ([#27035](https://github.com/apache/beam/pull/27035)). ## Breaking Changes diff --git a/sdks/python/container/piputil.go b/sdks/python/container/piputil.go index 03ac8325d6d0..a00e017445e3 100644 --- a/sdks/python/container/piputil.go +++ b/sdks/python/container/piputil.go @@ -37,14 +37,14 @@ func pipInstallRequirements(files []string, dir, name string) error { // as possible PyPI downloads. In the first round the --find-links // option will make sure that only things staged in the worker will be // used without following their dependencies. - args := []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--disable-pip-version-check", "--no-index", "--no-deps", "--find-links", dir} + args := []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--no-cache-dir", "--disable-pip-version-check", "--no-index", "--no-deps", "--find-links", dir} if err := execx.Execute("python", args...); err != nil { fmt.Println("Some packages could not be installed solely from the requirements cache. Installing packages from PyPI.") } // The second install round opens up the search for packages on PyPI and // also installs dependencies. The key is that if all the packages have // been installed in the first round then this command will be a no-op. - args = []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--disable-pip-version-check", "--find-links", dir} + args = []string{"-m", "pip", "install", "-r", filepath.Join(dir, name), "--no-cache-dir", "--disable-pip-version-check", "--find-links", dir} return execx.Execute("python", args...) } } @@ -76,18 +76,18 @@ func pipInstallPackage(files []string, dir, name string, force, optional bool, e // installed version will match the package specified, the package itself // will not be reinstalled, but its dependencies will now be resolved and // installed if necessary. This achieves our goal outlined above. - args := []string{"-m", "pip", "install", "--disable-pip-version-check", "--upgrade", "--force-reinstall", "--no-deps", + args := []string{"-m", "pip", "install", "--no-cache-dir", "--disable-pip-version-check", "--upgrade", "--force-reinstall", "--no-deps", filepath.Join(dir, packageSpec)} err := execx.Execute("python", args...) if err != nil { return err } - args = []string{"-m", "pip", "install", "--disable-pip-version-check", filepath.Join(dir, packageSpec)} + args = []string{"-m", "pip", "install", "--no-cache-dir", "--disable-pip-version-check", filepath.Join(dir, packageSpec)} return execx.Execute("python", args...) } // Case when we do not perform a forced reinstall. - args := []string{"-m", "pip", "install", "--disable-pip-version-check", filepath.Join(dir, packageSpec)} + args := []string{"-m", "pip", "install", "--no-cache-dir", "--disable-pip-version-check", filepath.Join(dir, packageSpec)} return execx.Execute("python", args...) } }