From b02adbd6cb1de2441b0c0244c5b80410a5f29be6 Mon Sep 17 00:00:00 2001 From: Chris Riccomini Date: Mon, 18 Sep 2023 11:12:50 -0700 Subject: [PATCH] Add output format for `recap schema` and `/schema` calls The CLI and gateway both support output format options. By default, the output format is still Recap, but users mauy also specify Avro, Protobuf, or JSON schema. In the CLI, an `--output-format` or `-of` option was added. In the gateway, the `Content-Type` header may be set to: - `application/avro+json` - `application/schema+json` - `application/x-protobuf` - `application/x-recap` --- recap/cli.py | 24 +++++++++--- recap/commands.py | 52 +++++++++++++++++++++++-- recap/converters/protobuf.py | 1 + recap/gateway.py | 29 ++++++++------ tests/integration/test_cli.py | 65 +++++++++++++++++++++++++++++++ tests/integration/test_gateway.py | 50 ++++++++++++++++++++++++ tests/unit/test_cli.py | 4 +- tests/unit/test_gateway.py | 3 +- 8 files changed, 202 insertions(+), 26 deletions(-) diff --git a/recap/cli.py b/recap/cli.py index c70c123..9cb1b83 100644 --- a/recap/cli.py +++ b/recap/cli.py @@ -1,11 +1,9 @@ from typing import Annotated, Optional import typer -import uvicorn -from rich import print_json +from rich import print, print_json from recap import commands -from recap.types import to_dict app = typer.Typer() @@ -21,13 +19,25 @@ def ls(url: Annotated[Optional[str], typer.Argument(help="URL to parent.")] = No @app.command() -def schema(url: Annotated[str, typer.Argument(help="URL to schema.")]): +def schema( + url: Annotated[str, typer.Argument(help="URL to schema.")], + output_format: Annotated[ + commands.SchemaFormat, + typer.Option("--output-format", "-of", help="Schema output format."), + ] = commands.SchemaFormat.recap, +): """ Get a URL's schema. """ - if recap_struct := commands.schema(url): - print_json(data=to_dict(recap_struct)) + struct_obj = commands.schema(url, output_format) + match struct_obj: + case dict(): + print_json(data=struct_obj) + case str(): + print(struct_obj) + case _: + raise ValueError(f"Unexpected schema type: {type(struct_obj)}") @app.command() @@ -40,4 +50,6 @@ def serve( Start Recap's HTTP/JSON gateway server. """ + import uvicorn + uvicorn.run("recap.gateway:app", host=host, port=port, log_level=log_level) diff --git a/recap/commands.py b/recap/commands.py index ee3ab9c..d56e153 100644 --- a/recap/commands.py +++ b/recap/commands.py @@ -1,10 +1,22 @@ +from enum import Enum + from recap.clients import create_client, parse_url from recap.settings import RecapSettings -from recap.types import StructType settings = RecapSettings() +class SchemaFormat(str, Enum): + """ + Schema formats Recap can convert to. Used in the `schema` method. + """ + + avro = "avro" + json = "json" + protobuf = "protobuf" + recap = "recap" + + def ls(url: str | None = None) -> list[str] | None: """ List a URL's children. @@ -20,14 +32,46 @@ def ls(url: str | None = None) -> list[str] | None: return client.ls(*method_args) -def schema(url: str) -> StructType | None: +def schema(url: str, format: SchemaFormat = SchemaFormat.recap) -> dict | str: """ Get a URL's schema. :param url: URL where schema is located. - :return: Schema for URL. + :param format: Schema format to convert to. + :return: Schema in the requested format (encoded as a dict or string). """ connection_url, method_args = parse_url("schema", url) with create_client(connection_url) as client: - return client.schema(*method_args) + recap_struct = client.schema(*method_args) + output_obj: dict | str + match format: + case SchemaFormat.avro: + from recap.converters.avro import AvroConverter + + output_obj = AvroConverter().from_recap(recap_struct) + case SchemaFormat.json: + from recap.converters.json_schema import JSONSchemaConverter + + output_obj = JSONSchemaConverter().from_recap(recap_struct) + case SchemaFormat.protobuf: + from proto_schema_parser.generator import Generator + + from recap.converters.protobuf import ProtobufConverter + + proto_file = ProtobufConverter().from_recap(recap_struct) + proto_str = Generator().generate(proto_file) + + output_obj = proto_str + case SchemaFormat.recap: + from recap.types import to_dict + + struct_dict = to_dict(recap_struct) + if not isinstance(struct_dict, dict): + raise ValueError( + f"Expected a schema dict, but got {type(struct_dict)}" + ) + output_obj = struct_dict + case _: + raise ValueError(f"Unknown schema format: {format}") + return output_obj diff --git a/recap/converters/protobuf.py b/recap/converters/protobuf.py index 1349249..b2c73bd 100644 --- a/recap/converters/protobuf.py +++ b/recap/converters/protobuf.py @@ -315,6 +315,7 @@ def _from_recap_gather_types( match recap_type: case StructType(fields=fields): assert recap_type.alias is not None, "Struct must have an alias." + assert "." in recap_type.alias, "Alias must have dotted package." package, message_name = recap_type.alias.rsplit(".", 1) field_number = 1 message_elements: list[MessageElement] = [] diff --git a/recap/gateway.py b/recap/gateway.py index 55e70d0..ffbcb42 100644 --- a/recap/gateway.py +++ b/recap/gateway.py @@ -1,10 +1,16 @@ -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI, HTTPException, Request from recap import commands -from recap.types import to_dict app = FastAPI() +FORMAT_MAP = { + "application/schema+json": commands.SchemaFormat.json, + "application/avro+json": commands.SchemaFormat.avro, + "application/x-protobuf": commands.SchemaFormat.protobuf, + "application/x-recap": commands.SchemaFormat.recap, +} + @app.get("/ls/{url:path}") async def ls(url: str | None = None) -> list[str]: @@ -19,17 +25,16 @@ async def ls(url: str | None = None) -> list[str]: @app.get("/schema/{url:path}") -async def schema(url: str) -> dict: +async def schema(url: str, request: Request): """ Get the schema of a URL. """ - if recap_struct := commands.schema(url): - recap_dict = to_dict(recap_struct) - if not isinstance(recap_dict, dict): - raise HTTPException( - status_code=503, - detail=f"Expected a schema dict, but got {type(recap_dict)}", - ) - return recap_dict - raise HTTPException(status_code=404, detail="URL not found") + content_type = request.headers.get("content-type") or "application/x-recap" + if format := FORMAT_MAP.get(content_type): + return commands.schema(url, format) + else: + raise HTTPException( + status_code=415, + detail=f"Unsupported content type: {content_type}", + ) diff --git a/tests/integration/test_cli.py b/tests/integration/test_cli.py index 70bd220..3fcfab5 100644 --- a/tests/integration/test_cli.py +++ b/tests/integration/test_cli.py @@ -86,3 +86,68 @@ def test_schema(self): "type": "struct", "fields": [{"type": "int32", "name": "test_integer", "optional": True}], } + + def test_schema_avro(self): + result = runner.invoke( + app, + [ + "schema", + "postgresql://localhost:5432/testdb/public/test_types", + "-of=avro", + ], + ) + assert result.exit_code == 0 + assert loads(result.stdout) == { + "type": "record", + "fields": [ + {"name": "test_integer", "default": None, "type": ["null", "int"]} + ], + } + + def test_schema_json(self): + result = runner.invoke( + app, + [ + "schema", + "postgresql://localhost:5432/testdb/public/test_types", + "-of=json", + ], + ) + assert result.exit_code == 0 + assert loads(result.stdout) == { + "type": "object", + "properties": {"test_integer": {"default": None, "type": "integer"}}, + } + + @pytest.mark.skip(reason="Enable when #397 is fixed") + def test_schema_protobuf(self): + result = runner.invoke( + app, + [ + "schema", + "postgresql://localhost:5432/testdb/public/test_types", + "-of=protobuf", + ], + ) + assert result.exit_code == 0 + assert ( + result.stdout + == """ +TODO: Some proto schema +""" + ) + + def test_schema_recap(self): + result = runner.invoke( + app, + [ + "schema", + "postgresql://localhost:5432/testdb/public/test_types", + "-of=recap", + ], + ) + assert result.exit_code == 0 + assert loads(result.stdout) == { + "type": "struct", + "fields": [{"type": "int32", "name": "test_integer", "optional": True}], + } diff --git a/tests/integration/test_gateway.py b/tests/integration/test_gateway.py index d309c90..be90ba5 100644 --- a/tests/integration/test_gateway.py +++ b/tests/integration/test_gateway.py @@ -3,6 +3,7 @@ import httpx import psycopg2 +import pytest from uvicorn import Server from uvicorn.config import Config @@ -88,3 +89,52 @@ def test_schema(self): "type": "struct", "fields": [{"name": "test_integer", "type": "int32", "optional": True}], } + + def test_schema_avro(self): + response = client.get( + "/schema/postgresql://localhost:5432/testdb/public/test_types", + headers={"Content-Type": "application/avro+json"}, + ) + assert response.status_code == 200 + assert response.json() == { + "type": "record", + "fields": [ + {"name": "test_integer", "default": None, "type": ["null", "int"]} + ], + } + + def test_schema_json(self): + response = client.get( + "/schema/postgresql://localhost:5432/testdb/public/test_types", + headers={"Content-Type": "application/schema+json"}, + ) + assert response.status_code == 200 + assert response.json() == { + "type": "object", + "properties": {"test_integer": {"default": None, "type": "integer"}}, + } + + @pytest.mark.xfail(reason="Enable when #397 is fixed") + def test_schema_protobuf(self): + response = client.get( + "/schema/postgresql://localhost:5432/testdb/public/test_types", + headers={"Content-Type": "application/x-protobuf"}, + ) + assert response.status_code == 200 + assert ( + response.text + == """ +TODO: Some proto schema +""" + ) + + def test_schema_recap(self): + response = client.get( + "/schema/postgresql://localhost:5432/testdb/public/test_types", + headers={"Content-Type": "application/x-recap"}, + ) + assert response.status_code == 200 + assert response.json() == { + "type": "struct", + "fields": [{"name": "test_integer", "type": "int32", "optional": True}], + } diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index a990f37..82dcaca 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -6,7 +6,7 @@ from typer.testing import CliRunner from recap.cli import app -from recap.types import IntType, StructType +from recap.types import IntType, StructType, to_dict runner = CliRunner() @@ -39,7 +39,7 @@ def test_ls_subpath(self, mock_ls): @patch("recap.commands.schema") def test_schema(self, mock_schema): - mock_schema.return_value = StructType([IntType(bits=32)]) + mock_schema.return_value = to_dict(StructType([IntType(bits=32)])) result = runner.invoke(app, ["schema", "foo"]) assert result.exit_code == 0 assert loads(result.stdout) == {"type": "struct", "fields": ["int32"]} diff --git a/tests/unit/test_gateway.py b/tests/unit/test_gateway.py index 5709089..ab8f2c7 100644 --- a/tests/unit/test_gateway.py +++ b/tests/unit/test_gateway.py @@ -3,7 +3,6 @@ from fastapi.testclient import TestClient from recap.gateway import app -from recap.types import IntType, StructType client = TestClient(app) @@ -26,7 +25,7 @@ def test_ls_subpath(mock_ls): @patch("recap.commands.schema") def test_schema(mock_schema): - mock_schema.return_value = StructType([IntType(bits=32)]) + mock_schema.return_value = {"type": "struct", "fields": ["int32"]} response = client.get("/schema/foo") expected = {"type": "struct", "fields": ["int32"]} assert response.status_code == 200