diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index e9bcde3f..ee74bbf2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -125,7 +125,7 @@ jobs: CARGO: cargo CARGO_BUILD_TARGET: ${{ matrix.job.target }} PYAPP_REPO: pyapp - PYAPP_VERSION: v0.22.0 + PYAPP_VERSION: v0.23.0 PYAPP_PYTHON_VERSION: "3.12" PYAPP_PROJECT_FEATURES: oracle,postgres,mssql,mysql,server PYAPP_DISTRIBUTION_EMBED: "1" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3a9c386d..a11facd1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,7 +32,7 @@ repos: # Ruff replaces black, flake8, autoflake and isort - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: "v0.4.5" # make sure this is always consistent with hatch configs + rev: "v0.5.6" # make sure this is always consistent with hatch configs hooks: - id: ruff args: [--config, ./pyproject.toml] diff --git a/Makefile b/Makefile index 98fdad72..b59a7fc5 100644 --- a/Makefile +++ b/Makefile @@ -240,7 +240,7 @@ lint: ## Runs pre-commit hooks; includes ruff linting, codespell, bl .PHONY: test test: ## Run the tests @echo "=> Running test cases" - @hatch run local:cov + @hatch run +py="3.12" test:cov @echo "=> Tests complete" .PHONY: test-all diff --git a/docs/user_guide/mysql/collection_scripts.md b/docs/user_guide/mysql/collection_scripts.md index 5d20db3d..54ba52f6 100644 --- a/docs/user_guide/mysql/collection_scripts.md +++ b/docs/user_guide/mysql/collection_scripts.md @@ -1,8 +1,8 @@ +# Gather workload metadata + !!! note - For MySQL homogenous migrations, please upload the collections files to Google Migration Center - -# Gather workload metadata +!!! The workload collection supports MySQL 5.6 and newer. Older versions of MySQL are not currently supported. MariaDB is also not currently supported with this version of the script. @@ -40,12 +40,16 @@ unzip db-migration-assessment-collection-scripts-mysql.zip - Execute this from a system that can access your database via mysql command line client. Execute the collection script with connection parameters: + ``` ./collect-data.sh --collectionUserName root --collectionUserPass secret --hostName myhost.example.com --port 25432 --databaseService sys --vmUserName myuser --extraSSHArg "-p" --extraSSHA rg "12248" ``` + The example above will connect to a database named 'sys' on host myhost.example.com on port 25432 as user "root" with password "secret". It will also ssh as the current user to myhost.example.com, port 12248 to collect information on about the machine running the database. - - Parameters + +- Parameters + ``` Connection definition must one of: { @@ -67,7 +71,6 @@ The example above will connect to a database named 'sys' on host myhost.example. ``` - Examples: ```shell diff --git a/docs/user_guide/postgres/collection_scripts.md b/docs/user_guide/postgres/collection_scripts.md index c56b1da2..88f10e14 100644 --- a/docs/user_guide/postgres/collection_scripts.md +++ b/docs/user_guide/postgres/collection_scripts.md @@ -1,9 +1,10 @@ -!!! note - - For Postgres homogenous migrations, please upload the collections files to Google Migration Center # Gather workload metadata +!!! note + For Postgres homogenous migrations, please upload the collections files to Google Migration Center +!!! + The workload collection supports Postgres 12 and newer. Older versions of Postgres are not currently supported. ## System environment @@ -42,11 +43,15 @@ unzip db-migration-assessment-collection-scripts-postgres.zip - NOTE: The collector can be run for a single database or all databases in the instance. Execute the collection script with connection parameters: + ``` ./collect-data.sh --collectionUserName postgres --collectionUserPass secret --hostName myhost.example.com --port 25432 --vmUserName myuser --extraSSHArg -p --extraSSHArg 12248 ``` + The example above will connect to a database named 'postgres' (the default) on host myhost.example.com on port 25432 as user "postgres" with password "secret". It will also ssh as the current user to myhost.example.com, port 12248 to collect information on about the machine running the database. - - Parameters + +- Parameters + ``` Connection definition must one of: { @@ -71,7 +76,6 @@ The example above will connect to a database named 'postgres' (the default) on h ``` - Examples: ```shell diff --git a/pyproject.toml b/pyproject.toml index 6d2e4b2e..9b25c516 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,56 +72,12 @@ Issues = "https://github.com/GoogleCloudPlatform/database-assessment/issues" Source = "https://github.com/GoogleCloudPlatform/database-assessment" [project.optional-dependencies] -docs = [ - "mkdocs-include-markdown-plugin", - "mkdocs-gen-files", - "mkdocs-literate-nav", - "mkdocs-section-index", - "mkdocstrings[python]", - "mkdocs-glightbox>=0.3.0", - "mkdocs-material[git,imaging,recommended]", - "mkdocs-minify-plugin", - "mike>=2.0.0", - # Extensions - "pymdown-extensions>=10.5", - # Necessary for syntax highlighting in code blocks - "pygments>=2.13", - # Validation - "linkchecker>=10.3", -] -linting = [ - "mypy", - "ruff", - "pylint", - "pre-commit", - "types-click", - "types-six", - "types-decorator", - "types-pyyaml", - "types-setuptools", - "asyncpg-stubs", - "pyyaml>=6", -] mssql = ["aioodbc"] mysql = ["asyncmy>=0.2.9"] oracle = ["oracledb"] postgres = ["asyncpg>=0.29.0"] server = ["litestar[structlog,jinja]>=2.7.0", "litestar-granian>=0.2.3"] -testing = [ - "nodeenv", - # required test dependencies - "cython", - "anyio", - "coverage[toml]>=6.2", - "pytest", - "pytest-cov", - "pytest-mock", - "pytest-vcr", - "pytest-sugar", - "pytest-click", - "pytest-xdist", - "pytest-databases", -] + [project.scripts] dma = "dma.cli.main:app" @@ -154,10 +110,58 @@ exclude = ["/.github", "/docs"] # Default environment with production dependencies [tool.hatch.envs.default] -installer = "uv" +extra-dependencies = [ + "bump2version", + "nodeenv", + # required test dependencies + "cython", + "anyio", + "coverage[toml]>=6.2", + "pytest", + "pytest-cov", + "pytest-mock", + "pytest-vcr", + "pytest-sugar", + "pytest-click", + "pytest-xdist", + "pytest-databases", + # lint + "mypy", + "ruff", + "pylint", + "pre-commit", + "types-click", + "types-six", + "types-decorator", + "types-pyyaml", + "types-setuptools", + "asyncpg-stubs", + "pyyaml>=6", + # docs + "mkdocs-include-markdown-plugin", + "mkdocs-gen-files", + "mkdocs-literate-nav", + "mkdocs-section-index", + "mkdocstrings[python]", + "mkdocs-glightbox>=0.3.0", + "mkdocs-material[git,imaging,recommended]", + "mkdocs-minify-plugin", + "mike>=2.0.0", + # Extensions + "pymdown-extensions>=10.5", + # Necessary for syntax highlighting in code blocks + "pygments>=2.13", + # Validation + "linkchecker>=10.3", + # cli databasae brwoser + "harlequin", + "jupyterlab", + "ipython", + "seaborn", + "pandas", -[tool.hatch.envs.default.env-vars] -USE_LEGACY_DOCKER_COMPOSE = "True" +] +installer = "uv" [tool.hatch.envs.default.scripts] upgrade-all = "PIP_COMPILE_UPGRADE=1 hatch env run --env {env_name} -- python --version" @@ -165,7 +169,7 @@ upgrade-pkg = "PIP_COMPILE_UPGRADE='{args}' hatch env run --env {env_name} -- py # Test environment with test-only dependencies [tool.hatch.envs.test] -features = ["server", "oracle", "mysql", "mssql", "postgres", "testing"] +features = ["server", "oracle", "mysql", "mssql", "postgres"] template = "default" type = "virtual" @@ -181,7 +185,7 @@ no-cov = "cov --no-cov {args}" # Docs environment [tool.hatch.envs.docs] -features = ["server", "oracle", "mysql", "mssql", "postgres", "testing", "docs"] +features = ["server", "oracle", "mysql", "mssql", "postgres"] template = "default" type = "virtual" @@ -200,21 +204,11 @@ validate = "linkchecker --config .linkcheckerrc --ignore-url=/reference --ignore build-check = ["build", "validate"] [tool.hatch.envs.local] -extra-dependencies = [ - "bump2version", - # cli databasae brwoser - "harlequin", - "jupyterlab", - "ipython", - "seaborn", - "pandas", - -] -features = ["server", "oracle", "mysql", "mssql", "postgres", "testing", "docs", "linting"] +features = ["server", "oracle", "mysql", "mssql", "postgres"] lock-filename = "requirements/requirements-dev.txt" path = ".venv/" python = "3.12" -template = "test" +template = "default" type = "virtual" [tool.hatch.envs.local.scripts] @@ -224,8 +218,9 @@ stop-infra = "docker-compose -f tests/docker-compose.yml down --remove-orphans - # Lint environment [tool.hatch.envs.lint] -features = ["server", "oracle", "mysql", "mssql", "postgres", "testing", "docs", "linting"] +features = ["server", "oracle", "mysql", "mssql", "postgres"] python = "3.12" +template = "default" type = "virtual" [tool.hatch.envs.lint.scripts] @@ -393,6 +388,10 @@ lint.ignore = [ "PERF203", # ignore for now; investigate "COM812", "PLR0917", + "DOC201", # `return` is not documented in docstring + "DOC501", # Raised exception missing from docstring + "DOC502", # Raised exception missing from docstring + "A005", # module shadows builtin ] lint.select = ["ALL"] # Allow unused variables when underscore-prefixed. diff --git a/scripts/collector/mysql/collect-data.sh b/scripts/collector/mysql/collect-data.sh index 2dfd4e0c..270c52c6 100755 --- a/scripts/collector/mysql/collect-data.sh +++ b/scripts/collector/mysql/collect-data.sh @@ -151,6 +151,10 @@ SET @PKEY='${V_FILE_TAG}'; source ${f} exit EOF +if [ ! -s ${OUTPUT_DIR}/opdb__mysql_${fname}__${V_TAG} ]; then + hdr=$(echo ${f} | cut -d '.' -f 1 | rev | cut -d '/' -f 1 | rev) + cat sql/headers/${hdr}.header > ${OUTPUT_DIR}/opdb__mysql_${fname}__${V_TAG} +fi done for f in $(ls -1 sql/${SCRIPT_PATH}/*.sql | grep -v -E "init.sql|_base_path_lookup.sql|hostname.sql") do @@ -163,12 +167,17 @@ source ${f} exit EOF +if [ ! -s ${OUTPUT_DIR}/opdb__mysql_${fname}__${V_TAG} ]; then + hdr=$(echo ${f} | cut -d '.' -f 1 | rev | cut -d '/' -f 1 | rev) + cat sql/headers/${hdr}.header > ${OUTPUT_DIR}/opdb__mysql_${fname}__${V_TAG} +fi +done + serverHostname=$(${SQLCMD} --user=$user --password=$pass -h $host -P $port --force --silent --skip-column-names $db 2>>${OUTPUT_DIR}/opdb__stderr_${V_FILE_TAG}.log < sql/hostname.sql | tr -d '\r') serverIPs=$(getent hosts "$serverHostname" | awk '{print $1}' | tr '\n' ',') hostOut="output/opdb__mysql_db_host_${V_FILE_TAG}.csv" echo "HOSTNAME|IP_ADDRESSES" > "$hostOut" echo "\"$serverHostname\"|\"$serverIPs\"" >> "$hostOut" -done specsOut="output/opdb__mysql_db_machine_specs_${V_FILE_TAG}.csv" host=$(echo ${connectString} | cut -d '/' -f 4 | cut -d ':' -f 1) @@ -214,7 +223,7 @@ do cp sed_${V_FILE_TAG}.tmp ${outfile} rm sed_${V_FILE_TAG}.tmp else - ${SED} -r 's/[[:space:]]+\|/\|/g;s/\|[[:space:]]+/\|/g;/^$/d;/^\+/d;s/^\|//g;s/\|$//g;/^(.* row(s)?)/d;1 s/[a-z]/\U&/g' ${outfile} > sed_${V_FILE_TAG}.tmp + ${SED} -r 's/[[:space:]]+\|/\|/g;s/\|[[:space:]]+/\|/g;/^$/d;/^\+/d;s/^\|//g;s/\|$//g;/^(.* row(s)?)/d;' ${outfile} > sed_${V_FILE_TAG}.tmp cp sed_${V_FILE_TAG}.tmp ${outfile} rm sed_${V_FILE_TAG}.tmp fi diff --git a/scripts/collector/mysql/sql/headers/config.header b/scripts/collector/mysql/sql/headers/config.header new file mode 100644 index 00000000..0c03f107 --- /dev/null +++ b/scripts/collector/mysql/sql/headers/config.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|variable_category|variable_name|variable_value diff --git a/scripts/collector/mysql/sql/headers/data_types.header b/scripts/collector/mysql/sql/headers/data_types.header new file mode 100644 index 00000000..2095e263 --- /dev/null +++ b/scripts/collector/mysql/sql/headers/data_types.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|table_catalog|table_schema|table_name|data_type|data_type_count diff --git a/scripts/collector/mysql/sql/headers/database_details.header b/scripts/collector/mysql/sql/headers/database_details.header new file mode 100644 index 00000000..68f165a7 --- /dev/null +++ b/scripts/collector/mysql/sql/headers/database_details.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|table_schema|total_table_count|innodb_table_count|non_innodb_table_count|total_row_count|innodb_table_row_count|non_innodb_table_row_count|total_data_size_bytes|innodb_data_size_bytes|non_innodb_data_size_bytes|total_index_size_bytes|innodb_index_size_bytes|non_innodb_index_size_bytes|total_size_bytes|innodb_total_size_bytes|non_innodb_total_size_bytes|total_index_count|innodb_index_count|non_innodb_index_count diff --git a/scripts/collector/mysql/sql/headers/db_host.header b/scripts/collector/mysql/sql/headers/db_host.header new file mode 100644 index 00000000..1af501e3 --- /dev/null +++ b/scripts/collector/mysql/sql/headers/db_host.header @@ -0,0 +1 @@ +HOSTNAME|IP_ADDRESSES diff --git a/scripts/collector/mysql/sql/headers/db_machine.header b/scripts/collector/mysql/sql/headers/db_machine.header new file mode 100644 index 00000000..23dd5fbc --- /dev/null +++ b/scripts/collector/mysql/sql/headers/db_machine.header @@ -0,0 +1 @@ +PKEY|DMA_SOURCE_ID|DMA_MANUAL_ID|MACHINE_NAME|PHYSICAL_CPU_COUNT|LOGICAL_CPU_COUNT|TOTAL_OS_MEMORY_MB|TOTAL_SIZE_BYTES|USED_SIZE_BYTES|PRIMARY_MAC|IP_ADDRESSES diff --git a/scripts/collector/mysql/sql/headers/engines.header b/scripts/collector/mysql/sql/headers/engines.header new file mode 100644 index 00000000..679c15c3 --- /dev/null +++ b/scripts/collector/mysql/sql/headers/engines.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|engine_name|engine_support|engine_transactions|engine_xa|engine_savepoints|engine_comment diff --git a/scripts/collector/mysql/sql/headers/hostname.header b/scripts/collector/mysql/sql/headers/hostname.header new file mode 100644 index 00000000..c8ad592d --- /dev/null +++ b/scripts/collector/mysql/sql/headers/hostname.header @@ -0,0 +1 @@ +server_hostname diff --git a/scripts/collector/mysql/sql/headers/plugins.header b/scripts/collector/mysql/sql/headers/plugins.header new file mode 100644 index 00000000..fa3e7513 --- /dev/null +++ b/scripts/collector/mysql/sql/headers/plugins.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|plugin_name|plugin_version|plugin_status|plugin_type|plugin_type_version|plugin_library|plugin_library_version|plugin_author|plugin_description|plugin_license|load_option diff --git a/scripts/collector/mysql/sql/headers/process_list.header b/scripts/collector/mysql/sql/headers/process_list.header new file mode 100644 index 00000000..f688fc29 --- /dev/null +++ b/scripts/collector/mysql/sql/headers/process_list.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|process_id|process_host|process_db|process_command|process_time|process_state diff --git a/scripts/collector/mysql/sql/headers/resource_groups.header b/scripts/collector/mysql/sql/headers/resource_groups.header new file mode 100644 index 00000000..299e347b --- /dev/null +++ b/scripts/collector/mysql/sql/headers/resource_groups.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|resource_group_name|resource_group_type|resource_group_enabled|vcpu_ids|thread_priority diff --git a/scripts/collector/mysql/sql/headers/schema_objects.header b/scripts/collector/mysql/sql/headers/schema_objects.header new file mode 100644 index 00000000..1774209f --- /dev/null +++ b/scripts/collector/mysql/sql/headers/schema_objects.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|object_catalog|object_schema|object_category|object_type|object_owner_schema|object_owner|object_name diff --git a/scripts/collector/mysql/sql/headers/table_details.header b/scripts/collector/mysql/sql/headers/table_details.header new file mode 100644 index 00000000..f390a29a --- /dev/null +++ b/scripts/collector/mysql/sql/headers/table_details.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|table_schema|table_name|table_engine|table_rows|data_length|index_length|is_compressed|is_partitioned|partition_count|index_count|fulltext_index_count|is_encrypted|spatial_index_count|has_primary_key|row_format|table_type diff --git a/scripts/collector/mysql/sql/headers/users.header b/scripts/collector/mysql/sql/headers/users.header new file mode 100644 index 00000000..f0e36343 --- /dev/null +++ b/scripts/collector/mysql/sql/headers/users.header @@ -0,0 +1 @@ +pkey|dma_source_id|dma_manual_id|user_host|user_count diff --git a/scripts/collector/postgres/sql/base/database_details.sql b/scripts/collector/postgres/sql/base/database_details.sql index 125c2855..dacea207 100644 --- a/scripts/collector/postgres/sql/base/database_details.sql +++ b/scripts/collector/postgres/sql/base/database_details.sql @@ -22,7 +22,7 @@ with db as ( pg_encoding_to_char(db.encoding) as character_set_encoding, pg_database_size(db.datname) as total_disk_size_bytes from pg_database db - where datname = current_database() + where datname = current_database() ), db_size as ( select s.datid as database_oid, diff --git a/src/dma/lib/db/query_manager.py b/src/dma/lib/db/query_manager.py index fb1fc1d5..2ff81b7d 100644 --- a/src/dma/lib/db/query_manager.py +++ b/src/dma/lib/db/query_manager.py @@ -56,11 +56,7 @@ async def from_connection( queries: Queries, connection: Any, ) -> AsyncIterator[QueryManagerT]: - """Context manager that returns instance of query manager object. - - Returns: - The service object instance. - """ + """Context manager that returns instance of query manager object.""" yield cls(connection=connection, queries=queries) async def select(self, method: str, **binds: Any) -> list[dict[str, Any]]: diff --git a/src/dma/utils.py b/src/dma/utils.py index 0d1a898c..2b5d0ef9 100644 --- a/src/dma/utils.py +++ b/src/dma/utils.py @@ -38,8 +38,8 @@ class _ContextManagerWrapper: def __init__(self, cm: AbstractContextManager[T]) -> None: self._cm = cm - async def __aenter__(self) -> T: - return self._cm.__enter__() + async def __aenter__(self) -> T: # pyright: ignore[reportInvalidTypeVarUse] + return self._cm.__enter__() # type: ignore async def __aexit__( self, @@ -73,7 +73,7 @@ def wrap_sync(fn: Callable[P, T]) -> Callable[P, Awaitable[T]]: return fn async def wrapped(*args: P.args, **kwargs: P.kwargs) -> T: - return await anyio.to_thread.run_sync(partial(fn, *args, **kwargs)) + return await anyio.to_thread.run_sync(partial(fn, *args, **kwargs)) # pyright: ignore[reportAttributeAccessIssue] return wrapped diff --git a/tests/helpers.py b/tests/helpers.py index cf837d91..f5bbf7dd 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -33,8 +33,8 @@ class _ContextManagerWrapper: def __init__(self, cm: AbstractContextManager[T]) -> None: self._cm = cm - async def __aenter__(self) -> T: - return self._cm.__enter__() + async def __aenter__(self) -> T: # pyright: ignore[reportInvalidTypeVarUse] + return self._cm.__enter__() # type: ignore async def __aexit__( self, @@ -68,6 +68,6 @@ def wrap_sync(fn: Callable[P, T]) -> Callable[P, Awaitable[T]]: return fn async def wrapped(*args: P.args, **kwargs: P.kwargs) -> T: - return await anyio.to_thread.run_sync(partial(fn, *args, **kwargs)) + return await anyio.to_thread.run_sync(partial(fn, *args, **kwargs)) # type: ignore return wrapped