Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve PEX_PATH handling in pex.py #421

Merged
14 changes: 11 additions & 3 deletions pex/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,15 @@ def _activate(self):

# set up the local .pex environment
pex_info = self._pex_info.copy()
pex_info_pex_path = pex_info.pex_path
overrides_pex_path = self._pex_info_overrides.pex_path
pex_info.update(self._pex_info_overrides)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume that you found that this pex_info.update() call does not actually combine env vars + PEX-INFO, but instead clobbers one or the other?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup! Env vars clobber the existing vars set by pex info.

pex_info.pex_path = ":".join(filter(None, [pex_info_pex_path, overrides_pex_path]))
self._envs.append(PEXEnvironment(self._pex, pex_info))

# by this point, pex_info.pex_path will have a single pex path merged from pex_path
# in PEX-INFO and pex_path set in the environment
if pex_info.pex_path:
# set up other environments as specified in PEX_PATH
# set up other environments as specified in pex_path
for pex_path in filter(None, pex_info.pex_path.split(os.pathsep)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about instead of assigning to temp vars and then joining + mutating pex_info.pex_path here, just combining these into a new temporary list and then handling that directly?

also, would it make more sense to use self._vars.PEX_PATH as the source of the env var vs self._pex_info_overrides.pex_path? that's how it worked prior to your last change.

e.g.

# N.B. `PEX_PATH` entries written into `PEX-INFO` take precedence over those set in the environment.
combined_pex_path = self._merge_split(pex_info.pex_path, self._vars.PEX_PATH)
if combined_pex_path:
  for pex_path in combined_pex_path:
    ...

pex_info = PexInfo.from_pex(pex_path)
pex_info.update(self._pex_info_overrides)
Expand Down Expand Up @@ -279,7 +283,7 @@ def patch_all(path, path_importer_cache, modules):
sys.path[:], sys.path_importer_cache.copy(), sys.modules.copy())
new_sys_path, new_sys_path_importer_cache, new_sys_modules = self.minimum_sys(inherit_path)

new_sys_path.extend(filter(None, self._vars.PEX_PATH.split(os.pathsep)))
new_sys_path.extend(self._merge_split(self._pex_info.pex_path, self._vars.PEX_PATH))

patch_all(new_sys_path, new_sys_path_importer_cache, new_sys_modules)
yield
Expand Down Expand Up @@ -490,6 +494,10 @@ def cmdline(self, args=()):
cmds.extend(args)
return cmds

def _merge_split(self, *paths):
merged_paths = ":".join(filter(None, paths))
return filter(None, merged_paths.split(':'))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could reduce this to a one liner list comprehension:

def _merge_split(self, *paths):
  return [p for p in ':'.join(paths).split(':') if p]

with the if p bit doing the same as the filter(None, ...) above.

def run(self, args=(), with_chroot=False, blocking=True, setsid=False, **kwargs):
"""Run the PythonEnvironment in an interpreter in a subprocess.

Expand Down
49 changes: 49 additions & 0 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,52 @@ def test_pex_path_arg():
stdout, rc = run_simple_pex(pex_out_path, [test_file_path])
assert rc == 0
assert stdout == b'Success!\n'


def test_pex_path_in_pex_info_and_env():
with temporary_dir() as output_dir:

# create 2 pex files for PEX-INFO pex_path
pex1_path = os.path.join(output_dir, 'pex1.pex')
res1 = run_pex_command(['--disable-cache', 'requests', '-o', pex1_path])
res1.assert_success()
pex2_path = os.path.join(output_dir, 'pex2.pex')
res2 = run_pex_command(['--disable-cache', 'flask', '-o', pex2_path])
res2.assert_success()
pex_path = ':'.join(os.path.join(output_dir, name) for name in ('pex1.pex', 'pex2.pex'))

# create a pex for environment PEX_PATH
pex3_path = os.path.join(output_dir, 'pex3.pex')
res3 = run_pex_command(['--disable-cache', 'sqlalchemy', '-o', pex3_path])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, iirc sqlalchemy is a pretty heavy dep. how about something lighter, like bitarray, enum34, mox etc?

res3.assert_success()
env_pex_path = os.path.join(output_dir, 'pex3.pex')

# parameterize the pex arg for test.py
pex_out_path = os.path.join(output_dir, 'out.pex')
# create test file test.py that attempts to import modules from pex1/pex2
test_file_path = os.path.join(output_dir, 'test.py')
with open(test_file_path, 'w') as fh:
fh.write(dedent('''
import requests
import flask
import sqlalchemy
import sys
import os
import subprocess
print('Success!')
'''))

# build out.pex composed from pex1/pex1
run_pex_command(['--disable-cache',
'--pex-path={}'.format(pex_path),
'wheel',
'-o', pex_out_path])

# load secondary PEX_PATH
env = os.environ.copy()
env['PEX_PATH'] = env_pex_path

# run test.py with composite env
stdout, rc = run_simple_pex(pex_out_path, [test_file_path], env=env)
assert rc == 0
assert stdout == b'Success!\n'