Skip to content

fields & fieldlists interfaces and implementation #122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jan 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 175 additions & 14 deletions python-packages/smithy-python/smithy_python/_private/http/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@
# TODO: move all of this out of _private


from collections import Counter, OrderedDict
from collections.abc import Iterable
from dataclasses import dataclass, field
from typing import Any, Protocol
from urllib.parse import urlparse, urlunparse

from smithy_python.interfaces import http as http_interface
from ... import interfaces
from ...interfaces.http import FieldPosition as FieldPosition # re-export


class URI:
Expand Down Expand Up @@ -91,16 +94,15 @@ def __eq__(self, other: object) -> bool:
class Request:
def __init__(
self,
url: http_interface.URI,
url: interfaces.http.URI,
method: str = "GET",
headers: http_interface.HeadersList | None = None,
headers: interfaces.http.HeadersList | None = None,
body: Any = None,
):
self.url: http_interface.URI = url
self.url: interfaces.http.URI = url
self.method: str = method
self.body: Any = body

self.headers: http_interface.HeadersList = []
self.headers: interfaces.http.HeadersList = []
if headers is not None:
self.headers = headers

Expand All @@ -109,18 +111,177 @@ class Response:
def __init__(
self,
status_code: int,
headers: http_interface.HeadersList,
headers: interfaces.http.HeadersList,
body: Any,
):
self.status_code: int = status_code
self.headers: http_interface.HeadersList = headers
self.headers: interfaces.http.HeadersList = headers
self.body: Any = body


class Field(interfaces.http.Field):
"""
A name-value pair representing a single field in an HTTP Request or Response.

The kind will dictate metadata placement within an HTTP message.

All field names are case insensitive and case-variance must be treated as
equivalent. Names may be normalized but should be preserved for accuracy during
transmission.
"""

def __init__(
self,
*,
name: str,
values: Iterable[str] | None = None,
kind: FieldPosition = FieldPosition.HEADER,
):
self.name = name
self.values: list[str] = [val for val in values] if values is not None else []
self.kind = kind

def add(self, value: str) -> None:
"""Append a value to a field."""
self.values.append(value)

def set(self, values: list[str]) -> None:
"""Overwrite existing field values."""
self.values = values

def remove(self, value: str) -> None:
"""Remove all matching entries from list."""
try:
while True:
self.values.remove(value)
except ValueError:
return

def as_string(self) -> str:
"""
Get comma-delimited string of all values.

If the ``Field`` has zero values, the empty string is returned. If the ``Field``
has exactly one value, the value is returned unmodified.

For ``Field``s with more than one value, the values are joined by a comma and a
space. For such multi-valued ``Field``s, any values that already contain
commas or double quotes will be surrounded by double quotes. Within any values
that get quoted, pre-existing double quotes and backslashes are escaped with a
backslash.
"""
value_count = len(self.values)
if value_count == 0:
return ""
if value_count == 1:
return self.values[0]
return ", ".join(quote_and_escape_field_value(val) for val in self.values)

def as_tuples(self) -> list[tuple[str, str]]:
"""
Get list of ``name``, ``value`` tuples where each tuple represents one value.
"""
return [(self.name, val) for val in self.values]

def __eq__(self, other: object) -> bool:
"""Name, values, and kind must match. Values order must match."""
if not isinstance(other, Field):
return False
return (
self.name == other.name
and self.kind is other.kind
and self.values == other.values
)

def __repr__(self) -> str:
return f"Field(name={self.name!r}, value={self.values!r}, kind={self.kind!r})"


def quote_and_escape_field_value(value: str) -> str:
"""Escapes and quotes a single :class:`Field` value if necessary.

See :func:`Field.as_string` for quoting and escaping logic.
"""
chars_to_quote = (",", '"')
if any(char in chars_to_quote for char in value):
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
return f'"{escaped}"'
else:
return value


class Fields(interfaces.http.Fields):
def __init__(
self,
initial: Iterable[interfaces.http.Field] | None = None,
*,
encoding: str = "utf-8",
):
"""
Collection of header and trailer entries mapped by name.

:param initial: Initial list of ``Field`` objects. ``Field``s can alse be added
with :func:`set_field` and later removed with :func:`remove_field`.
:param encoding: The string encoding to be used when converting the ``Field``
name and value from ``str`` to ``bytes`` for transmission.
"""
init_fields = [fld for fld in initial] if initial is not None else []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was there a reason we're creating a new list here instead of just using initial? Is the concern we're going to get a generator or some other unindexable value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I kept this list comprehension here is that I wanted to allow any Iterable in the constructor signature. I could limit the type of initial to just list | None, or I could use list() instead of the list comprehension.

init_field_names = [self._normalize_field_name(fld.name) for fld in init_fields]
fname_counter = Counter(init_field_names)
repeated_names_exist = (
len(init_fields) > 0 and fname_counter.most_common(1)[0][1] > 1
)
if repeated_names_exist:
non_unique_names = [name for name, num in fname_counter.items() if num > 1]
raise ValueError(
"Field names of the initial list of fields must be unique. The "
"following normalized field names appear more than once: "
f"{', '.join(non_unique_names)}."
)
init_tuples = zip(init_field_names, init_fields)
self.entries: OrderedDict[str, interfaces.http.Field] = OrderedDict(init_tuples)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know we'd arrived at using an OrderedDict in a previous discussion. The reason we'd arrived at this was so we perform this by default for header ordering?

    def __eq__(self, other):
        return dict.__eq__(self, other) and all(map(_eq, self, other))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hah, took me a minute to understand. Yes, after the introduction of __iter__ this snippet would indeed work.

And yes, I decided to stick with OrderDict because it gives me the ordering check for free as part of the equality check:

>>> tuples1 = [('a', 1), ('b', 2)]
>>> tuples2 = [('b', 2), ('a', 1)]
>>> dict1, dict2 = dict(tuples1), dict(tuples2)
>>> od1, od2 = OrderedDict(tuples1), OrderedDict(tuples2)
>>> dict1 == dict2
True
>>> od1 == od2
False

self.encoding: str = encoding

def set_field(self, field: interfaces.http.Field) -> None:
"""Set entry for a Field name."""
normalized_name = self._normalize_field_name(field.name)
self.entries[normalized_name] = field

def get_field(self, name: str) -> interfaces.http.Field:
"""Retrieve Field entry."""
normalized_name = self._normalize_field_name(name)
return self.entries[normalized_name]

def remove_field(self, name: str) -> None:
"""Delete entry from collection."""
normalized_name = self._normalize_field_name(name)
del self.entries[normalized_name]

def get_by_type(self, kind: FieldPosition) -> list[interfaces.http.Field]:
"""Helper function for retrieving specific types of fields.

Used to grab all headers or all trailers.
"""
return [entry for entry in self.entries.values() if entry.kind is kind]
Comment on lines +260 to +265
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this works fine for now. I am curious though if we'd ever want to track this on insertion/removal to avoid iterating every header each time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean make entries a dict of dicts that gets accessed like self.entries[FieldPosition.HEADER]["x-my-header-name"]? Of course doable, but would require double bookkeeping because the API also expects the complete list of all fields to be maintained in order.


def _normalize_field_name(self, name: str) -> str:
"""Normalize field names. For use as key in ``entries``."""
return name.lower()

def __eq__(self, other: object) -> bool:
"""Encoding must match. Entries must match in values and order."""
if not isinstance(other, Fields):
return False
return self.encoding == other.encoding and self.entries == other.entries

def __iter__(self) -> Iterable[interfaces.http.Field]:
yield from self.entries.values()


@dataclass
class Endpoint(http_interface.Endpoint):
url: http_interface.URI
headers: http_interface.HeadersList = field(default_factory=list)
class Endpoint(interfaces.http.Endpoint):
url: interfaces.http.URI
headers: interfaces.http.HeadersList = field(default_factory=list)


@dataclass
Expand All @@ -131,10 +292,10 @@ class StaticEndpointParams:
:params url: A static URI to route requests to.
"""

url: str | http_interface.URI
url: str | interfaces.http.URI


class StaticEndpointResolver(http_interface.EndpointResolver[StaticEndpointParams]):
class StaticEndpointResolver(interfaces.http.EndpointResolver[StaticEndpointParams]):
"""A basic endpoint resolver that forwards a static url."""

async def resolve_endpoint(self, params: StaticEndpointParams) -> Endpoint:
Expand Down Expand Up @@ -164,7 +325,7 @@ async def resolve_endpoint(self, params: StaticEndpointParams) -> Endpoint:


class _StaticEndpointConfig(Protocol):
endpoint_resolver: http_interface.EndpointResolver[StaticEndpointParams] | None
endpoint_resolver: interfaces.http.EndpointResolver[StaticEndpointParams] | None


def set_static_endpoint_resolver(config: _StaticEndpointConfig) -> None:
Expand Down
102 changes: 102 additions & 0 deletions python-packages/smithy-python/smithy_python/interfaces/http.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from collections import OrderedDict
from dataclasses import dataclass
from enum import Enum
from typing import Any, Protocol, TypeVar

# Defining headers as a list instead of a mapping to avoid ambiguity and
Expand All @@ -7,6 +21,94 @@
QueryParamsList = list[tuple[str, str]]


class FieldPosition(Enum):
"""
The type of a field. Defines its placement in a request or response.
"""

HEADER = 0
"""
Header field. In HTTP this is a header as defined in RFC 9114 Section 6.3.
Implementations of other protocols may use this FieldPosition for similar types
of metadata.
"""
Comment on lines +30 to +34
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious where this pattern of including the docstring after what it's discussing. I've only ever seen it in this repo and it strikes me as unintuitive. Am I missing a new convention? 😅

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do this because many years ago I learned that this is the one way to make IDEs pick it up as the docstring. I always assumed but never validated that this is also true for documentation generators. It's surprisingly difficult to find documentation for this behavior, but this Stackoverflow answer suggests that I am not the only one who arrived at this conclusion.

A possible explanation for why it works this way is this: Consider the alternative of putting the enum entry docstring before the entry. The first entry's docstring would be immediately adjacent to the enum class docstring. Python would merge those into one string, and docs generators and IDEs wouldn't know which part belongs to the class and which to the entry:

class MyEnum(Enum):
    """My class docstring"""
    """... is directly next to my entry docstring!"""
    FIRST_ENTRY = 0
    """My second entry docstring"""
    SECOND_ENTRY = 1

Copy link
Contributor

@dlm6693 dlm6693 Jan 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe also for doc generation not doing it below the variable can cause issues depending on which generator you're using.

An alternative that I've come across if preferred is similar to how we format doc strings for functions:

class MyEnum(Enum):
    """My class docstring
    
    Attributes
    - - - - - - - - - - 
    FIRST_ENTRY: int
        First entry doc

    SECOND_ENTRY: int
        Second entry doc
    """


TRAILER = 1
"""
Trailer field. In HTTP this is a trailer as defined in RFC 9114 Section 6.5.
Implementations of other protocols may use this FieldPosition for similar types
of metadata.
"""


class Field(Protocol):
"""
A name-value pair representing a single field in a request or response.

The kind will dictate metadata placement within an the message, for example as
header or trailer field in a HTTP request as defined in RFC 9114 Section 4.2.

All field names are case insensitive and case-variance must be treated as
equivalent. Names may be normalized but should be preserved for accuracy during
transmission.
"""

name: str
values: list[str]
kind: FieldPosition = FieldPosition.HEADER

def add(self, value: str) -> None:
"""Append a value to a field."""
...

def set(self, values: list[str]) -> None:
"""Overwrite existing field values."""
...

def remove(self, value: str) -> None:
"""Remove all matching entries from list."""
...

def as_string(self) -> str:
"""Serialize the ``Field``'s values into a single line string."""
...

def as_tuples(self) -> list[tuple[str, str]]:
"""
Get list of ``name``, ``value`` tuples where each tuple represents one value.
"""
...


class Fields(Protocol):
"""
Protocol agnostic mapping of key-value pair request metadata, such as HTTP fields.
"""

# Entries are keyed off the name of a provided Field
entries: OrderedDict[str, Field]
encoding: str | None = "utf-8"

def set_field(self, field: Field) -> None:
"""Set entry for a Field name."""
...

def get_field(self, name: str) -> Field:
"""Retrieve Field entry."""
...

def remove_field(self, name: str) -> None:
"""Delete entry from collection."""
...

def get_by_type(self, kind: FieldPosition) -> list[Field]:
"""Helper function for retrieving specific types of fields.

Used to grab all headers or all trailers.
"""
...


class URI(Protocol):
"""Universal Resource Identifier, target location for a :py:class:`Request`."""

Expand Down
Loading