Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional IngestionJob parameters passed by Spark Launcher #1130

Merged
merged 22 commits into from
Nov 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/complete.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: [self-hosted]
strategy:
matrix:
component: [core, serving, jobcontroller, jupyter]
component: [core, serving, jobservice, jupyter]
env:
GITHUB_PR_SHA: ${{ github.event.pull_request.head.sha }}
REGISTRY: gcr.io/kf-feast
Expand Down
9 changes: 1 addition & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -121,17 +121,13 @@ build-push-docker:
@$(MAKE) push-core-docker registry=$(REGISTRY) version=$(VERSION)
@$(MAKE) push-serving-docker registry=$(REGISTRY) version=$(VERSION)
@$(MAKE) push-ci-docker registry=$(REGISTRY) version=$(VERSION)
@$(MAKE) push-jobcontroller-docker registry=$(REGISTRY) version=$(VERSION)
@$(MAKE) push-jobservice-docker registry=$(REGISTRY) version=$(VERSION)

build-docker: build-core-docker build-serving-docker build-ci-docker build-jobcontroller-docker build-jobservice-docker
build-docker: build-core-docker build-serving-docker build-ci-docker build-jobservice-docker

push-core-docker:
docker push $(REGISTRY)/feast-core:$(VERSION)

push-jobcontroller-docker:
docker push $(REGISTRY)/feast-jobcontroller:$(VERSION)

push-jobservice-docker:
docker push $(REGISTRY)/feast-jobservice:$(VERSION)

Expand All @@ -150,9 +146,6 @@ build-core-docker:
build-jobservice-docker:
docker build -t $(REGISTRY)/feast-jobservice:$(VERSION) -f infra/docker/jobservice/Dockerfile .

build-jobcontroller-docker:
docker build -t $(REGISTRY)/feast-jobcontroller:$(VERSION) -f infra/docker/jobcontroller/Dockerfile .

build-serving-docker:
docker build -t $(REGISTRY)/feast-serving:$(VERSION) -f infra/docker/serving/Dockerfile .

Expand Down
6 changes: 0 additions & 6 deletions docs/coverage/java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,6 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>dev.feast</groupId>
<artifactId>feast-ingestion</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>dev.feast</groupId>
<artifactId>feast-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,40 +37,32 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}

{{- if .Values.gcpServiceAccount.enabled }}
{{- if Values.secrets }}
volumes:
- name: {{ template "feast-jobservice.fullname" . }}-gcp-service-account
{{ - range $secret := .Values.secrets }}
- name: {{ $secret }}
secret:
secretName: {{ .Values.gcpServiceAccount.existingSecret.name }}
secretName: {{ $secret }}
{{ - end }}
{{- end }}

containers:
- name: {{ .Chart.Name }}
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ .Values.image.pullPolicy }}

{{- if .Values.gcpServiceAccount.enabled }}
{{- if .Values.secrets }}
volumeMounts:
- name: {{ template "feast-jobservice.fullname" . }}-gcp-service-account
mountPath: /etc/secrets/google
{{ - range $secret := .Values.secrets }}
- name: {{ $secret }}
mountPath: "/etc/secrets/{{ $secret }}"
readOnly: true
{{ - end }}
{{- end }}

env:
- name: FEAST_CORE_URL
value: "{{ .Release.Name }}-feast-core:6565"
- name: FEAST_HISTORICAL_SERVING_URL
value: "{{ .Release.Name }}-feast-batch-serving:6566"

{{- if .Values.gcpServiceAccount.enabled }}
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /etc/secrets/google/{{ .Values.gcpServiceAccount.existingSecret.key }}
{{- end }}

{{- if .Values.gcpProjectId }}
- name: GOOGLE_CLOUD_PROJECT
value: {{ .Values.gcpProjectId | quote }}
{{- end }}

{{- range $key, $value := .Values.envOverrides }}
- name: {{ printf "%s" $key | replace "." "_" | upper | quote }}
Expand Down
24 changes: 22 additions & 2 deletions infra/docker-compose/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,30 @@ services:
- /opt/feast/feast-core.jar
- --spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml

jobservice:
image: gcr.io/kf-feast/feast-jobservice:${FEAST_VERSION}
depends_on:
- core
ports:
- 6568:6568
volumes:
- $PWD:/shared
environment:
FEAST_CORE_URL: core:6565
FEAST_SERVING_URL: online_serving:6566
FEAST_SPARK_LAUNCHER: standalone
FEAST_SPARK_STANDALONE_MASTER: local
FEAST_SPARK_HOME: /usr/local/spark
FEAST_SPARK_STAGING_LOCATION: file:///shared/staging
FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION: file:///shared/historical_feature_output
FEAST_HISTORICAL_FEATURE_OUTPUT_FORMAT: parquet
FEAST_REDIS_HOST: redis
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a port?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FEAST_REDIS_PORT is a separate options, and its default value 6379 was not changed in this case. So I believe it was omitted to save 1 line)


jupyter:
image: gcr.io/kf-feast/feast-jupyter:${FEAST_VERSION}
volumes:
- ${GCP_SERVICE_ACCOUNT}:/etc/gcloud/service-accounts/key.json
- $PWD:/shared
depends_on:
- core
environment:
Expand All @@ -30,8 +50,8 @@ services:
FEAST_SPARK_LAUNCHER: standalone
FEAST_SPARK_STANDALONE_MASTER: local
FEAST_SPARK_HOME: /usr/local/spark
FEAST_SPARK_STAGING_LOCATION: file:///tmp/staging
FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION: file:///tmp/historical_feature_output
FEAST_SPARK_STAGING_LOCATION: file:///shared/staging
FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION: file:///shared/historical_feature_output
FEAST_HISTORICAL_FEATURE_OUTPUT_FORMAT: parquet
FEAST_REDIS_HOST: redis
GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json
Expand Down
4 changes: 2 additions & 2 deletions infra/docker/jobservice/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ RUN make compile-protos-python
# Install Feast SDK
COPY .git .git
COPY README.md README.md
RUN pip install -e sdk/python -U
RUN pip install -U -e sdk/python
RUN pip install "s3fs" "boto3" "urllib3>=1.25.4"

#
Expand All @@ -27,4 +27,4 @@ RUN wget -q https://github.com/grpc-ecosystem/grpc-health-probe/releases/downloa
-O /usr/bin/grpc-health-probe && \
chmod +x /usr/bin/grpc-health-probe

CMD ["feast", "server"]
CMD ["python", "-m", "feast.cli", "server"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason for this change?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

container was failing with

pkg_resources.DistributionNotFound: The 'Click==7.*' distribution was not found and is required by feast

It could be due to any reason, since it's conda.

11 changes: 9 additions & 2 deletions infra/scripts/test-docker-compose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ export JUPYTER_DOCKER_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .Network

# Print Jupyter container information
docker inspect feast_jupyter_1
docker logs feast_jupyter_1

# Wait for Jupyter Notebook Container to come online
${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${JUPYTER_DOCKER_CONTAINER_IP_ADDRESS}:8888 --timeout=60
Expand All @@ -56,10 +55,18 @@ export FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .N
# Wait for Feast Online Serving to be ready
${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS}:6566 --timeout=120


# Get Feast Job Service container IP address
export FEAST_JOB_SERVICE_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' feast_jobservice_1)

# Wait for Feast Job Service to be ready
${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${FEAST_JOB_SERVICE_CONTAINER_IP_ADDRESS}:6568 --timeout=120

# Run e2e tests for Redis
docker exec \
-e FEAST_VERSION=${FEAST_VERSION} \
-e DISABLE_SERVICE_FIXTURES=true \
-e DISABLE_FEAST_SERVICE_FIXTURES=true \
--user root \
feast_jupyter_1 bash \
-c 'cd /feast/tests && python -m pip install -r requirements.txt && pytest e2e/ -m "not bq" --ingestion-jar gs://feast-jobs/spark/ingestion/feast-ingestion-spark-${FEAST_VERSION}.jar --redis-url redis:6379 --core-url core:6565 --serving-url online_serving:6566 --kafka-brokers kafka:9092'
-c 'cd /feast/tests && python -m pip install -r requirements.txt && pytest e2e/ --ingestion-jar https://storage.googleapis.com/feast-jobs/spark/ingestion/feast-ingestion-spark-${FEAST_VERSION}.jar --redis-url redis:6379 --core-url core:6565 --serving-url online_serving:6566 --job-service-url jobservice:6568 --staging-path file:///shared/staging/ --kafka-brokers kafka:9092'
2 changes: 2 additions & 0 deletions infra/scripts/test-end-to-end-gcp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,7 @@ python -m pip install -qr tests/requirements.txt
su -p postgres -c "PATH=$PATH HOME=/tmp pytest -v tests/e2e/ \
--feast-version develop --env=gcloud --dataproc-cluster-name feast-e2e \
--dataproc-project kf-feast --dataproc-region us-central1 \
--staging-path gs://feast-templocation-kf-feast/ \
--with-job-service \
--redis-url 10.128.0.105:6379 --redis-cluster --kafka-brokers 10.128.0.103:9094 \
--bq-project kf-feast"
2 changes: 0 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,11 @@
<module>datatypes/java</module>
<module>storage/api</module>
<module>storage/connectors</module>
<module>ingestion</module>
<module>core</module>
<module>serving</module>
<module>sdk/java</module>
<module>docs/coverage/java</module>
<module>common</module>
<module>job-controller</module>
<module>common-test</module>
<module>spark/ingestion</module>
</modules>
Expand Down
28 changes: 23 additions & 5 deletions sdk/python/feast/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def version(self):
return result

@property
def project(self) -> Union[str, None]:
def project(self) -> str:
"""
Retrieve currently active project

Expand Down Expand Up @@ -949,6 +949,7 @@ def get_historical_features(
>>> output_file_uri = feature_retrieval_job.get_output_file_uri()
"gs://some-bucket/output/
"""
project = project or FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]
feature_tables = self._get_feature_tables_from_feature_refs(
feature_refs, project
)
Expand Down Expand Up @@ -1001,7 +1002,12 @@ def get_historical_features(
)
else:
return start_historical_feature_retrieval_job(
self, entity_source, feature_tables, output_format, output_location,
client=self,
project=self.project,
entity_source=entity_source,
feature_tables=feature_tables,
output_format=output_format,
output_path=output_location,
)

def get_historical_features_df(
Expand Down Expand Up @@ -1043,7 +1049,10 @@ def get_historical_features_df(
feature_refs, project
)
return start_historical_feature_retrieval_spark_session(
self, entity_source, feature_tables
client=self,
project=self.project,
entity_source=entity_source,
feature_tables=feature_tables,
)

def _get_feature_tables_from_feature_refs(
Expand Down Expand Up @@ -1079,7 +1088,13 @@ def start_offline_to_online_ingestion(
:return: Spark Job Proxy object
"""
if not self._use_job_service:
return start_offline_to_online_ingestion(feature_table, start, end, self)
return start_offline_to_online_ingestion(
client=self,
project=self.project,
feature_table=feature_table,
start=start,
end=end,
)
else:
request = StartOfflineToOnlineIngestionJobRequest(
project=self.project, table_name=feature_table.name,
Expand All @@ -1096,7 +1111,10 @@ def start_stream_to_online_ingestion(
) -> SparkJob:
if not self._use_job_service:
return start_stream_to_online_ingestion(
feature_table, extra_jars or [], self
client=self,
project=self.project,
feature_table=feature_table,
extra_jars=extra_jars or [],
)
else:
request = StartStreamToOnlineIngestionJobRequest(
Expand Down
24 changes: 19 additions & 5 deletions sdk/python/feast/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ class AuthProvider(Enum):
CONFIG_REDIS_PORT = "redis_port"
CONFIG_REDIS_SSL = "redis_ssl"

CONFIG_STATSD_ENABLED = "statsd_enabled"
CONFIG_STATSD_HOST = "statsd_host"
CONFIG_STATSD_PORT = "statsd_port"

CONFIG_DEADLETTER_PATH = "deadletter_path"
CONFIG_STENCIL_URL = "stencil_url"

CONFIG_SPARK_EMR_REGION = "emr_region"
CONFIG_SPARK_EMR_CLUSTER_ID = "emr_cluster_id"
CONFIG_SPARK_EMR_CLUSTER_TEMPLATE_PATH = "emr_cluster_template_path"
Expand All @@ -113,10 +120,6 @@ class AuthProvider(Enum):
CONFIG_SERVING_ENABLE_SSL_KEY: "False",
# Path to certificate(s) to secure connection to Feast Serving
CONFIG_SERVING_SERVER_SSL_CERT_KEY: "",
# Enable or disable TLS/SSL to Feast Job Service
CONFIG_JOB_SERVICE_ENABLE_SSL_KEY: "False",
# Path to certificate(s) to secure connection to Feast Job Service
CONFIG_JOB_SERVICE_SERVER_SSL_CERT_KEY: "",
# Default connection timeout to Feast Serving, Feast Core, and Feast Job Service (in seconds)
CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY: "10",
# Default gRPC connection timeout when sending an ApplyFeatureSet command to
Expand All @@ -129,10 +132,21 @@ class AuthProvider(Enum):
# Authentication Provider - Google OpenID/OAuth
CONFIG_AUTH_PROVIDER: "google",
CONFIG_SPARK_LAUNCHER: "dataproc",
CONFIG_SPARK_INGESTION_JOB_JAR: "gs://feast-jobs/spark/ingestion/feast-ingestion-spark-develop.jar",
CONFIG_SPARK_INGESTION_JOB_JAR: "https://storage.googleapis.com/feast-jobs/spark/"
"ingestion/feast-ingestion-spark-develop.jar",
CONFIG_SPARK_STANDALONE_MASTER: "local[*]",
CONFIG_REDIS_HOST: "localhost",
CONFIG_REDIS_PORT: "6379",
CONFIG_REDIS_SSL: "False",
CONFIG_SPARK_HISTORICAL_FEATURE_OUTPUT_FORMAT: "parquet",
# Enable or disable TLS/SSL to Feast Service
CONFIG_JOB_SERVICE_ENABLE_SSL_KEY: "False",
# Path to certificate(s) to secure connection to Feast Job Service
CONFIG_JOB_SERVICE_SERVER_SSL_CERT_KEY: "",
CONFIG_STATSD_ENABLED: "False",
# IngestionJob DeadLetter Destination
CONFIG_DEADLETTER_PATH: "",
# ProtoRegistry Address (currently only Stencil Server is supported as registry)
# https://github.com/gojekfarm/stencil
CONFIG_STENCIL_URL: "",
}
Loading