Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #7858: Moved ingestion related make commands into Makefile in ingestion directory #13677

Merged
merged 9 commits into from
Oct 30, 2023
105 changes: 3 additions & 102 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,118 +1,20 @@
.DEFAULT_GOAL := help
PY_SOURCE ?= ingestion/src
include ingestion/Makefile

.PHONY: help
help:
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[35m%-30s\033[0m %s\n", $$1, $$2}'

.PHONY: install
install: ## Install the ingestion module to the current environment
python -m pip install ingestion/

.PHONY: install_apis
install_apis: ## Install the REST APIs module to the current environment
python -m pip install openmetadata-airflow-apis/

.PHONY: install_test
install_test: ## Install the ingestion module with test dependencies
python -m pip install "ingestion[test]/"

.PHONY: install_e2e_tests
install_e2e_tests: ## Install the ingestion module with e2e test dependencies (playwright)
python -m pip install "ingestion[e2e_test]/"
playwright install --with-deps

.PHONY: install_dev
install_dev: ## Install the ingestion module with dev dependencies
python -m pip install "ingestion[dev]/"

.PHONY: install_all
install_all: ## Install the ingestion module with all dependencies
python -m pip install "ingestion[all]/"

.PHONY: precommit_install
precommit_install: ## Install the project's precommit hooks from .pre-commit-config.yaml
@echo "Installing pre-commit hooks"
@echo "Make sure to first run install_test first"
pre-commit install

.PHONY: lint
lint: ## Run pylint on the Python sources to analyze the codebase
PYTHONPATH="${PYTHONPATH}:./ingestion/plugins" find $(PY_SOURCE) -path $(PY_SOURCE)/metadata/generated -prune -false -o -type f -name "*.py" | xargs pylint

.PHONY: py_format
py_format: ## Run black and isort to format the Python codebase
pycln ingestion/ openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated --all
isort ingestion/ openmetadata-airflow-apis/ --skip $(PY_SOURCE)/metadata/generated --skip ingestion/env --skip ingestion/build --skip openmetadata-airflow-apis/build --profile black --multi-line 3
black ingestion/ openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated

.PHONY: py_format_check
py_format_check: ## Check if Python sources are correctly formatted
pycln ingestion/ openmetadata-airflow-apis/ --diff --extend-exclude $(PY_SOURCE)/metadata/generated --all
isort --check-only ingestion/ openmetadata-airflow-apis/ --skip $(PY_SOURCE)/metadata/generated --skip ingestion/build --skip openmetadata-airflow-apis/build --profile black --multi-line 3
black --check --diff ingestion/ openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated
PYTHONPATH="${PYTHONPATH}:./ingestion/plugins" pylint --fail-under=10 $(PY_SOURCE)/metadata --ignore-paths $(PY_SOURCE)/metadata/generated || (echo "PyLint error code $$?"; exit 1)

## Ingestion models generation
.PHONY: generate
generate: ## Generate the pydantic models from the JSON Schemas to the ingestion module
@echo "Running Datamodel Code Generator"
@echo "Make sure to first run the install_dev recipe"
rm -rf ingestion/src/metadata/generated
mkdir -p ingestion/src/metadata/generated
python scripts/datamodel_generation.py
$(MAKE) py_antlr js_antlr
$(MAKE) install

## Ingestion tests & QA
.PHONY: run_ometa_integration_tests
run_ometa_integration_tests: ## Run Python integration tests
coverage run --rcfile ingestion/.coveragerc -a --branch -m pytest -c ingestion/setup.cfg --junitxml=ingestion/junit/test-results-integration.xml ingestion/tests/integration/ometa ingestion/tests/integration/orm_profiler ingestion/tests/integration/test_suite ingestion/tests/integration/data_insight ingestion/tests/integration/lineage

.PHONY: unit_ingestion
unit_ingestion: ## Run Python unit tests
coverage run --rcfile ingestion/.coveragerc -a --branch -m pytest -c ingestion/setup.cfg --junitxml=ingestion/junit/test-results-unit.xml --ignore=ingestion/tests/unit/source ingestion/tests/unit

.PHONY: run_e2e_tests
run_e2e_tests: ## Run e2e tests
pytest --screenshot=only-on-failure --output="ingestion/tests/e2e/artifacts" $(ARGS) --junitxml=ingestion/junit/test-results-e2e.xml ingestion/tests/e2e

.PHONY: run_python_tests
run_python_tests: ## Run all Python tests with coverage
coverage erase
$(MAKE) unit_ingestion
$(MAKE) run_ometa_integration_tests
coverage report --rcfile ingestion/.coveragerc || true

.PHONY: coverage
coverage: ## Run all Python tests and generate the coverage XML report
$(MAKE) run_python_tests
coverage xml --rcfile ingestion/.coveragerc -o ingestion/coverage.xml || true
sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")/src/g" ingestion/coverage.xml >> ingestion/ci-coverage.xml

.PHONY: sonar_ingestion
sonar_ingestion: ## Run the Sonar analysis based on the tests results and push it to SonarCloud
docker run \
--rm \
-e SONAR_HOST_URL="https://sonarcloud.io" \
-e SONAR_SCANNER_OPTS="-Xmx1g" \
-e SONAR_LOGIN=$(token) \
-v ${PWD}/ingestion:/usr/src \
sonarsource/sonar-scanner-cli \
-Dproject.settings=sonar-project.properties

.PHONY: run_apis_tests
run_apis_tests: ## Run the openmetadata airflow apis tests
coverage erase
coverage run --rcfile openmetadata-airflow-apis/.coveragerc -a --branch -m pytest --junitxml=openmetadata-airflow-apis/junit/test-results.xml openmetadata-airflow-apis/tests
coverage report --rcfile openmetadata-airflow-apis/.coveragerc

.PHONY: coverage_apis
coverage_apis: ## Run the python tests on openmetadata-airflow-apis
$(MAKE) run_apis_tests
coverage xml --rcfile openmetadata-airflow-apis/.coveragerc -o openmetadata-airflow-apis/coverage.xml
sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")\///g" openmetadata-airflow-apis/coverage.xml >> openmetadata-airflow-apis/ci-coverage.xml

## Yarn
.PHONY: yarn_install_cache
yarn_install_cache: ## Use Yarn to install UI dependencies
Expand Down Expand Up @@ -158,12 +60,11 @@ core_py_antlr: ## Generate the Python core code for parsing FQNs under ingestio

.PHONY: py_antlr
py_antlr: ## Generate the Python code for parsing FQNs
antlr4 -Dlanguage=Python3 -o ingestion/src/metadata/generated/antlr ${PWD}/openmetadata-spec/src/main/antlr4/org/openmetadata/schema/*.g4
antlr4 -Dlanguage=Python3 -o ingestion/src/metadata/generated/antlr openmetadata-spec/src/main/antlr4/org/openmetadata/schema/*.g4

.PHONY: js_antlr
js_antlr: ## Generate the Python code for parsing FQNs
antlr4 -Dlanguage=JavaScript -o openmetadata-ui/src/main/resources/ui/src/generated/antlr ${PWD}/openmetadata-spec/src/main/antlr4/org/openmetadata/schema/*.g4

antlr4 -Dlanguage=JavaScript -o openmetadata-ui/src/main/resources/ui/src/generated/antlr openmetadata-spec/src/main/antlr4/org/openmetadata/schema/*.g4

.PHONY: install_antlr_cli
install_antlr_cli: ## Install antlr CLI locally
Expand Down
110 changes: 110 additions & 0 deletions ingestion/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
DIRECTORY_NAME := $(notdir $(CURDIR))
PY_SOURCE ?= ./src

ifeq (ingestion,$(DIRECTORY_NAME))
INGESTION_DIR := .
ROOT_DIR := ..
else
INGESTION_DIR := ingestion
ROOT_DIR := .
endif

.PHONY: install
install: ## Install the ingestion module to the current environment
python -m pip install $(INGESTION_DIR)/

.PHONY: install_dev
install_dev: ## Install the ingestion module with dev dependencies
python -m pip install "$(INGESTION_DIR)[dev]/"

.PHONY: install_test
install_test: ## Install the ingestion module with test dependencies
python -m pip install "$(INGESTION_DIR)[test]/"

.PHONY: install_all
install_all: ## Install the ingestion module with all dependencies
python -m pip install "$(INGESTION_DIR)[all]/"

.PHONY: install_apis
install_apis: ## Install the REST APIs module to the current environment
python -m pip install $(ROOT_DIR)openmetadata-airflow-apis/

.PHONY: lint
lint: ## Run pylint on the Python sources to analyze the codebase
PYTHONPATH="${PYTHONPATH}:$(INGESTION_DIR)/plugins" find $(PY_SOURCE) -path $(PY_SOURCE)/metadata/generated -prune -false -o -type f -name "*.py" | xargs pylint

.PHONY: precommit_install
precommit_install: ## Install the project's precommit hooks from .pre-commit-config.yaml
@echo "Installing pre-commit hooks"
@echo "Make sure to first run install_test first"
pre-commit install

.PHONY: py_format
py_format: ## Run black and isort to format the Python codebase
pycln $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated --all
isort $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --skip $(PY_SOURCE)/metadata/generated --skip $(INGESTION_DIR)/env --skip $(INGESTION_DIR)/build --skip $(ROOT_DIR)/openmetadata-airflow-apis/build --profile black --multi-line 3
black $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated

.PHONY: py_format_check
py_format_check: ## Check if Python sources are correctly formatted
pycln $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --diff --extend-exclude $(PY_SOURCE)/metadata/generated --all
isort --check-only $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --skip $(PY_SOURCE)/metadata/generated --skip $(INGESTION_DIR)/build --skip $(ROOT_DIR)/openmetadata-airflow-apis/build --profile black --multi-line 3
black --check --diff $(INGESTION_DIR)/ $(ROOT_DIR)/openmetadata-airflow-apis/ --extend-exclude $(PY_SOURCE)/metadata/generated
PYTHONPATH="${PYTHONPATH}:$(INGESTION_DIR)/plugins" pylint --fail-under=10 $(PY_SOURCE)/metadata --ignore-paths $(PY_SOURCE)/metadata/generated || (echo "PyLint error code $$?"; exit 1)

.PHONY: unit_ingestion
unit_ingestion: ## Run Python unit tests
coverage run --rcfile $(INGESTION_DIR)/.coveragerc -a --branch -m pytest -c $(INGESTION_DIR)/setup.cfg --junitxml=$(INGESTION_DIR)/junit/test-results-unit.xml --ignore=$(INGESTION_DIR)/tests/unit/source $(INGESTION_DIR)/tests/unit

## Ingestion tests & QA
.PHONY: run_ometa_integration_tests
run_ometa_integration_tests: ## Run Python integration tests
coverage run --rcfile $(INGESTION_DIR)/.coveragerc -a --branch -m pytest -c $(INGESTION_DIR)/setup.cfg --junitxml=$(INGESTION_DIR)/junit/test-results-integration.xml $(INGESTION_DIR)/tests/integration/ometa ingestion/tests/integration/orm_profiler $(INGESTION_DIR)/tests/integration/test_suite $(INGESTION_DIR)/tests/integration/data_insight $(INGESTION_DIR)/tests/integration/lineage

.PHONY: run_python_tests
run_python_tests: ## Run all Python tests with coverage
coverage erase
$(MAKE) unit_ingestion
$(MAKE) run_ometa_integration_tests
coverage report --rcfile $(INGESTION_DIR)/.coveragerc || true

.PHONY: sonar_ingestion
sonar_ingestion: ## Run the Sonar analysis based on the tests results and push it to SonarCloud
docker run \
--rm \
-e SONAR_HOST_URL="https://sonarcloud.io" \
-e SONAR_SCANNER_OPTS="-Xmx1g" \
-e SONAR_LOGIN=$(token) \
-v ${PWD}/$(INGESTION_DIR):/usr/src \
sonarsource/sonar-scanner-cli \
-Dproject.settings=sonar-project.properties

.PHONY: run_apis_tests
run_apis_tests: ## Run the openmetadata airflow apis tests
coverage erase
coverage run --rcfile $(ROOT_DIR)/openmetadata-airflow-apis/.coveragerc -a --branch -m pytest --junitxml=$(ROOT_DIR)/openmetadata-airflow-apis/junit/test-results.xml $(ROOT_DIR)/openmetadata-airflow-apis/tests
coverage report --rcfile $(ROOT_DIR)/openmetadata-airflow-apis/.coveragerc


.PHONY: coverage_apis
coverage_apis: ## Run the python tests on openmetadata-airflow-apis
$(MAKE) run_apis_tests
coverage xml --rcfile $(ROOT_DIR)/openmetadata-airflow-apis/.coveragerc -o $(ROOT_DIR)/openmetadata-airflow-apis/coverage.xml
sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")\///g" $(INGESTION_DIR)/openmetadata-airflow-apis/coverage.xml >> $(INGESTION_DIR)/openmetadata-airflow-apis/ci-coverage.xml

## Ingestion models generation
.PHONY: generate
generate: ## Generate the pydantic models from the JSON Schemas to the ingestion module
@echo "Running Datamodel Code Generator"
@echo "Make sure to first run the install_dev recipe"
rm -rf $(INGESTION_DIR)/src/metadata/generated
mkdir -p $(INGESTION_DIR)/src/metadata/generated
python $(ROOT_DIR)/scripts/datamodel_generation.py
$(MAKE) -C $(ROOT_DIR) py_antlr js_antlr
$(MAKE) install

.PHONY: coverage
coverage: ## Run all Python tests and generate the coverage XML report
$(MAKE) run_python_tests
coverage xml --rcfile $(INGESTION_DIR)/.coveragerc -o $(INGESTION_DIR)/coverage.xml || true
sed -e "s/$(shell python -c "import site; import os; from pathlib import Path; print(os.path.relpath(site.getsitepackages()[0], str(Path.cwd())).replace('/','\/'))")/src/g" $(INGESTION_DIR)/coverage.xml >> $(INGESTION_DIR)/ci-coverage.xml
12 changes: 6 additions & 6 deletions ingestion/src/metadata/antlr/split_listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ class FqnSplitListener(FqnListener):
def __init__(self):
self._list = []

def enterQuotedName(self, ctx: FqnParser.QuotedNameContext):
def enter_quoted_name(self, ctx: FqnParser.QuotedNameContext):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we'll need to double-check why the server is unable to start. I am not sure if we might be overlooking anything by updating this method names since those are related to ANTLR.

I'd suggest to revert this changes and see if the py-tests pass properly, since the major topic in the github issue was about cleaning up the Makefile (which is beautifully done here already).

Thanks

self._list.append(ctx.getText())

def enterUnquotedName(self, ctx: FqnParser.UnquotedNameContext):
def enter_unquoted_name(self, ctx: FqnParser.UnquotedNameContext):
self._list.append(ctx.getText())

def split(self):
Expand All @@ -35,16 +35,16 @@ class EntityLinkSplitListener(EntityLinkListener):
def __init__(self):
self._list = []

def enterEntityAttribute(self, ctx: EntityLinkParser.EntityAttributeContext):
def enter_entity_attribute(self, ctx: EntityLinkParser.EntityAttributeContext):
self._list.append(ctx.getText())

def enterEntityType(self, ctx: EntityLinkParser.EntityTypeContext):
def enter_entity_type(self, ctx: EntityLinkParser.EntityTypeContext):
self._list.append(ctx.getText())

def enterEntityField(self, ctx: EntityLinkParser.EntityFieldContext):
def enter_entity_field(self, ctx: EntityLinkParser.EntityFieldContext):
self._list.append(ctx.getText())

def enterEntityFqn(self, ctx: EntityLinkParser.EntityFqnContext):
def enter_entity_fqn(self, ctx: EntityLinkParser.EntityFqnContext):
self._list.append(ctx.getText())

def split(self):
Expand Down
26 changes: 16 additions & 10 deletions scripts/datamodel_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,30 @@

import datamodel_code_generator.model.pydantic
from datamodel_code_generator.imports import Import
import os


UNICODE_REGEX_REPLACEMENT_FILE_PATHS = [
"ingestion/src/metadata/generated/schema/entity/classification/tag.py",
"ingestion/src/metadata/generated/schema/entity/events/webhook.py",
"ingestion/src/metadata/generated/schema/entity/teams/user.py",
"ingestion/src/metadata/generated/schema/entity/type.py",
"ingestion/src/metadata/generated/schema/type/basic.py",
]

datamodel_code_generator.model.pydantic.types.IMPORT_SECRET_STR = Import.from_full_path(
"metadata.ingestion.models.custom_pydantic.CustomSecretStr"
)

from datamodel_code_generator.__main__ import main

args = "--input openmetadata-spec/src/main/resources/json/schema --input-file-type jsonschema --output ingestion/src/metadata/generated/schema --set-default-enum-member".split(
" "
)
current_directory = os.getcwd()
ingestion_path = "./" if current_directory.endswith("/ingestion") else "ingestion/"
directory_root = "../" if current_directory.endswith("/ingestion") else "./"

UNICODE_REGEX_REPLACEMENT_FILE_PATHS = [
f"{ingestion_path}src/metadata/generated/schema/entity/classification/tag.py",
f"{ingestion_path}src/metadata/generated/schema/entity/events/webhook.py",
f"{ingestion_path}src/metadata/generated/schema/entity/teams/user.py",
f"{ingestion_path}src/metadata/generated/schema/entity/type.py",
f"{ingestion_path}src/metadata/generated/schema/type/basic.py",
]

args = f"--input {directory_root}openmetadata-spec/src/main/resources/json/schema --input-file-type jsonschema --output {ingestion_path}src/metadata/generated/schema --set-default-enum-member".split(" ")

main(args)

for file_path in UNICODE_REGEX_REPLACEMENT_FILE_PATHS:
Expand Down
Loading