|
36 | 36 | from pyspark.util import _exception_message |
37 | 37 |
|
38 | 38 |
|
39 | | -def launch_gateway(conf=None): |
| 39 | +def launch_gateway(conf=None, popen_kwargs=None): |
40 | 40 | """ |
41 | 41 | launch jvm gateway |
42 | 42 | :param conf: spark configuration passed to spark-submit |
| 43 | + :param popen_kwargs: Dictionary of kwargs to pass to Popen when spawning |
| 44 | + the py4j JVM. This is a developer feature intended for use in |
| 45 | + customizing how pyspark interacts with the py4j JVM (e.g., capturing |
| 46 | + stdout/stderr). |
43 | 47 | :return: |
44 | 48 | """ |
45 | 49 | if "PYSPARK_GATEWAY_PORT" in os.environ: |
46 | 50 | gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"]) |
47 | 51 | gateway_secret = os.environ["PYSPARK_GATEWAY_SECRET"] |
| 52 | + # Process already exists |
| 53 | + proc = None |
48 | 54 | else: |
49 | 55 | SPARK_HOME = _find_spark_home() |
50 | 56 | # Launch the Py4j gateway using Spark's run command so that we pick up the |
@@ -75,15 +81,20 @@ def launch_gateway(conf=None): |
75 | 81 | env["_PYSPARK_DRIVER_CONN_INFO_PATH"] = conn_info_file |
76 | 82 |
|
77 | 83 | # Launch the Java gateway. |
| 84 | + popen_kwargs = {} if popen_kwargs is None else popen_kwargs |
78 | 85 | # We open a pipe to stdin so that the Java gateway can die when the pipe is broken |
| 86 | + popen_kwargs['stdin'] = PIPE |
| 87 | + # We always set the necessary environment variables. |
| 88 | + popen_kwargs['env'] = env |
79 | 89 | if not on_windows: |
80 | 90 | # Don't send ctrl-c / SIGINT to the Java gateway: |
81 | 91 | def preexec_func(): |
82 | 92 | signal.signal(signal.SIGINT, signal.SIG_IGN) |
83 | | - proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env) |
| 93 | + popen_kwargs['preexec_fn'] = preexec_func |
| 94 | + proc = Popen(command, **popen_kwargs) |
84 | 95 | else: |
85 | 96 | # preexec_fn not supported on Windows |
86 | | - proc = Popen(command, stdin=PIPE, env=env) |
| 97 | + proc = Popen(command, **popen_kwargs) |
87 | 98 |
|
88 | 99 | # Wait for the file to appear, or for the process to exit, whichever happens first. |
89 | 100 | while not proc.poll() and not os.path.isfile(conn_info_file): |
@@ -118,6 +129,8 @@ def killChild(): |
118 | 129 | gateway = JavaGateway( |
119 | 130 | gateway_parameters=GatewayParameters(port=gateway_port, auth_token=gateway_secret, |
120 | 131 | auto_convert=True)) |
| 132 | + # Store a reference to the Popen object for use by the caller (e.g., in reading stdout/stderr) |
| 133 | + gateway.proc = proc |
121 | 134 |
|
122 | 135 | # Import the classes used by PySpark |
123 | 136 | java_import(gateway.jvm, "org.apache.spark.SparkConf") |
|
0 commit comments