diff --git a/FeathrSandbox.Dockerfile b/FeathrSandbox.Dockerfile index 219cf97af..1472ca250 100644 --- a/FeathrSandbox.Dockerfile +++ b/FeathrSandbox.Dockerfile @@ -10,8 +10,31 @@ RUN echo 'REACT_APP_API_ENDPOINT=http://localhost:8000' >> .env.production RUN npm install && npm run build + +# Stage 1: build frontend ui +FROM gradle:7.6.0-jdk8 as gradle-build +WORKDIR /usr/src/feathr + +# for folers, we need to specify the dest foler name +# COPY feathr-compute/ ./feathr-compute/ +# COPY feathr-config/ ./feathr-config/ +# COPY feathr-data-models/ ./feathr-data-models/ +# COPY feathr-impl/ ./feathr-impl/ +# COPY gradle/ ./gradle/ +# COPY gradle.properties . +# COPY gradlew . +# COPY gradlew.bat . +# COPY repositories.gradle . +# COPY settings.gradle . +# COPY ["feathr-compute/", "feathr-config/", "feathr-data-models/", "feathr-impl/", "gradle/","gradle.properties", "gradlew", "gradlew.bat", "build.gradle", "repositories.gradle", "settings.gradle", "./"] +COPY . . +RUN ./gradlew build + + FROM jupyter/pyspark-notebook + + USER root ## Install dependencies @@ -53,6 +76,7 @@ USER jovyan # UID is like this: uid=1000(jovyan) gid=100(users) groups=100(users) COPY --chown=1000:100 ./docs/samples/local_quickstart_notebook.ipynb . COPY --chown=1000:100 ./feathr-sandbox/feathr_init_script.py . +COPY --chown=1000:100 --from=gradle-build /usr/src/feathr/build/libs . # Run the script so that maven cache can be added for better experience. Otherwise users might have to wait for some time for the maven cache to be ready. RUN python feathr_init_script.py diff --git a/feathr-sandbox/feathr_init_script.py b/feathr-sandbox/feathr_init_script.py index 3e0d37d2b..dbc0c4c3b 100644 --- a/feathr-sandbox/feathr_init_script.py +++ b/feathr-sandbox/feathr_init_script.py @@ -18,7 +18,6 @@ os.environ['SPARK_LOCAL_IP'] = "127.0.0.1" os.environ['REDIS_PASSWORD'] = "foobared" # default password for Redis - yaml_config = f""" api_version: 1 project_config: @@ -30,7 +29,7 @@ spark_result_output_parts: '1' local: master: 'local[*]' - feathr_runtime_location: + feathr_runtime_location: "./feathr_2.12-{feathr.__version__}.jar" online_store: redis: @@ -44,6 +43,8 @@ api_endpoint: "http://127.0.0.1:8000/api/v1" """ +print(yaml_config) + tmp = tempfile.NamedTemporaryFile(mode='w', delete=False) with open(tmp.name, "w") as text_file: text_file.write(yaml_config) diff --git a/feathr_project/feathr/spark_provider/_localspark_submission.py b/feathr_project/feathr/spark_provider/_localspark_submission.py index e946f636b..012e5523c 100644 --- a/feathr_project/feathr/spark_provider/_localspark_submission.py +++ b/feathr_project/feathr/spark_provider/_localspark_submission.py @@ -106,7 +106,23 @@ def submit_feathr_job( print(python_files) spark_args.append(python_files[0]) else: - spark_args.extend(["--class", main_class_name, main_jar_path]) + if not python_files: + # This is a JAR job + # Azure Synapse/Livy doesn't allow JAR job starts from Maven directly, we must have a jar file uploaded. + # so we have to use a dummy jar as the main file. + # Use the no-op jar as the main file + # This is a dummy jar which contains only one `org.example.Noop` class with one empty `main` function + # which does nothing + main_jar_path = main_jar_path + spark_args.extend(["--packages", maven_dependency, "--class", main_class_name, main_jar_path]) + else: + spark_args.extend(["--packages", maven_dependency]) + # This is a PySpark job, no more things to + if python_files.__len__() > 1: + spark_args.extend(["--py-files", ",".join(python_files[1:])]) + print(python_files) + spark_args.append(python_files[0]) + if arguments: spark_args.extend(arguments)