From aea1d96078fa0b5c3807969465c5213651f17f1d Mon Sep 17 00:00:00 2001 From: "mana\"/scriptalert('xss')/script" Date: Thu, 20 Jan 2022 11:52:48 +0100 Subject: [PATCH] Better list articles endpoint (#339) - added new endpoints that should allow other teams to efficiently crawl our data - make use of the multiple GSI we currently offer, including `type` and `sub_type` filters - added some documentation for the core service see BUZZ-1035 --- stroeer/core/v1/core_article_service.proto | 311 ++++++++++++++++-- stroeer/fragment/v1/stage_service.proto | 2 +- .../article/v1/article_page_service.proto | 2 +- .../section/v1/section_page_service.proto | 2 +- 4 files changed, 287 insertions(+), 30 deletions(-) diff --git a/stroeer/core/v1/core_article_service.proto b/stroeer/core/v1/core_article_service.proto index 1c7a39fc..a4a5933b 100644 --- a/stroeer/core/v1/core_article_service.proto +++ b/stroeer/core/v1/core_article_service.proto @@ -1,55 +1,312 @@ syntax = "proto3"; /** - * @FileArticle ℠ArticleService + * @FileArticle ⚙︎ CoreArticleService */ package stroeer.core.v1; import "stroeer/core/v1/article.proto"; +import "google/protobuf/timestamp.proto"; +import "google/protobuf/empty.proto"; option go_package = "github.com/stroeer/go-tapir/core/v1;core"; option java_multiple_files = true; option java_package = "de.stroeer.core.v1"; -// Service to manage core articles +/** +* ```protobuf +* service ArticleService { +* // get a single article by its `article_id` +* rpc GetArticle (GetArticleRequest) returns (stroeer.core.v1.Article) {} +* // query multiple articles at once +* rpc ListArticles (ListArticlesRequest) returns (ListArticlesResponse) {} +* // list the available root sections +* rpc ListSections (google.protobuf.Empty) returns (ListSectionsResponse) {} +* } +* ``` +* Core service to either query a single article (`rpc GetArticle()`) identified +* by its id or to query multiple articles (`rpc ListArticles()`) by providing +* a query. +* +* All results returned from this service are _unfiltered_, hence they may contain +* [`elements`](element.html) that are *expired*, *not yet valid* or whose [`state`](metadata.html#state) +* is not `PUBLISHED`. +*/ + service ArticleService { - // Returns an article by it's `id` rpc GetArticle (GetArticleRequest) returns (stroeer.core.v1.Article) {} - - // Returns pages of articles by their `home_section` rpc ListArticles (ListArticlesRequest) returns (ListArticlesResponse) {} + rpc ListSections (google.protobuf.Empty) returns (ListSectionsResponse) {} } - -// Request message to get an article. +/** +* # `⚙︎ GetArticle` +* ```protobuf +* rpc GetArticle (GetArticleRequest) returns (stroeer.core.v1.Article) {} +* ``` +* +* returns a single [`stroeer.core.v1.Article`](Article.html) if the given `id` exists, +* an `Error`, otherwise. (todo: describe errors) +* +* ## `GetArticleRequest` +* ```protobuf +* message GetArticleRequest { +* int64 id = 1; +* } +* ``` +* | Field name | Type | Description | +* |------------------|---------------------|-------------------------------------------------------------| +* | `id` | `id` | [required] Unique ID of the article defined by the content management system | +*/ message GetArticleRequest { - // ID of the article defined by the content management system (required). int64 id = 1; } +/** +* # `⚙︎ ListArticles` +* ```protobuf +* rpc ListArticles (ListArticlesRequest) returns (ListArticlesResponse) {} +* ``` +* +* returns a [`message-listarticlesresponse`](#ListArticlesResponse) with +* articles matching the query. If the results exceed 100 Articles or 1 MB +* the response can be paginated to obtain additional results. +* +*/ message ListArticlesRequest { - // mandatory: valid section_path, with leading and trailing slash - // e.g. `/sport/` - string section_path = 1; - - // The maximum number of articles to return. The service may return fewer than - // this value. - // If unspecified, at most 10 articles will be returned. - // The maximum value is 100; values above 100 will be coerced to 100. - int32 page_size = 2; - - // A page token, received from a previous `ListArticles` call. - // Provide this to retrieve the subsequent page. - // - // When paginating, all other parameters provided to `ListArticles` must match - // the call that provided the page token. - string page_token = 3; + /** + * ## `ListArticlesRequest` + * ```protobuf + * message ListArticlesRequest { + * Query query = 1; + * Filters filters = 2; + * int32 page_size = 3; + * string page_token = 4; + * } + * ``` + * + * | Field name | Type | Description | + * |------------------|---------------------|-------------------------------------------------------------| + * | `query` | [`Query`][1] | [required] find items based on query values | + * | `filters` | [`Filters`][2] | [optional] A filter expression is applied after a Query finishes, but before the results are returned. | + * | `page_size` | `int32` | [optional] limit the results per page, default is `10`; max is `100` (or result exceeds `1 MB`). Values above 100 will be coerced to 100. If results get truncated, you can use pagination. | + * | `page_token` | `string` | [optional] A page token, received from a previous `ListArticles` call. Provide this to retrieve the subsequent page. When paginating, all other parameters provided to `ListArticles` must match the call that provided the page token.| + * + * [1]: #query + * [2]: #filters + */ + + Query query = 1; + Filters filters = 2; + int32 page_size = 3; + string page_token = 4; + + message Query { + /** + * ## `Query` + * ```protobuf + * message Query { + * string path = 1; + * Type type = 2; + * SortBy sort_by = 3; + * Order order = 4; + * google.protobuf.Timestamp from_time = 5; + * google.protobuf.Timestamp to_time = 6; + * } + * ``` + * Specify the search criteria. The list-API is build around sections which + * come in two flavors: + * + * 1. `home_section`: find all articles that resides within that exact section. + * The `home_section` is equal to the settings found in the CMS, e.g. `/nachrichten/wissen/` + * 2. `root_section`: this property is derived from the `home_section` path + * by retaining only the _root_ folder, e.g. for `/nachrichten/wissen/` the `root_section` + * becomes `/nachrichten/` + * + * In most cases using the `root_section` should yield better results since + * it will also find content in nested sections whereas `home_section` would + * only return content which was curated into the exact section that was queried. + * + * | Field name | Type | Description | + * |------------------|---------------------|-------------------------------------------------------------| + * | `path` | `string` | [required] `path`, with leading and trailing slash (e.g. `/nachrichten/`) | + * | `type` | [`Type`][3] | [required] query type, either `Type.HOME_SECTION` or `Type.ROOT_SECTION` | + * | `sort_by` | [`SortBy`][4] | [required] sorting of the result set, either `SortBy.UPDATE_TIME` or `SortBy.PUBLISH_TIME` | + * | `order` | [`Order`][5] | [optional] sorting direction for the results regarding the `sort_by` field, default is `Order.ASCENDING` | + * | `from_time` | [`Timestamp`][ts] | [optional] time constraint that refers to the `sort_by` field. | + * | `to_time` | [`Timestamp`][ts] | [optional] time constraint that refers to the `sort_by` field. | + * + * [3]: #type + * [4]: #sortby + * [5]: #order + * [ts]: https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#google.protobuf.Timestamp + */ + + string path = 1; + Type type = 2; + SortBy sort_by = 3; + Order order = 4; + google.protobuf.Timestamp from_time = 5; + google.protobuf.Timestamp to_time = 6; + + enum Type { + /** + * ## `Type` + * ```protobuf + * enum Type { + * TYPE_UNSPECIFIED = 0; + * HOME_SECTION = 1; + * ROOT_SECTION = 2; + * } + * ``` + * + * | Enum value | Description | + * |--------------------|-------------------------------------------------------------| + * | `TYPE_UNSPECIFIED` | unspecified | + * | `HOME_SECTION` | query by exact _home section_ which is configured in the CMS| + * | `ROOT_SECTION` | query by exact _root section_ which is derived from _home section_ when only retaining the first level of the `path` | + * + * see the description [above](#query) why these query types exist, also see + * [`Reference`](article.html#section_tree) how section information are stored. + */ + TYPE_UNSPECIFIED = 0; + HOME_SECTION = 1; + ROOT_SECTION = 2; + } + + enum SortBy { + /** + * ## `SortBy` + * ```protobuf + * enum SortBy { + * SORT_BY_UNSPECIFIED = 0; + * UPDATE_TIME = 1; + * PUBLISH_TIME = 2; + * } + * ``` + * + * | Enum value | Description | + * |-----------------------|-------------------------------------------------------------| + * | `SORT_BY_UNSPECIFIED` | unspecified | + * | `UPDATE_TIME` | sort by the content's [`update_time`][8] | + * | `PUBLISH_TIME` | sort by the content's [`publish_time`][9] | + * + * [8]: metadata.html#update_time + * [9]: metadata.html#publish_time + */ + SORT_BY_UNSPECIFIED = 0; + UPDATE_TIME = 1; + PUBLISH_TIME = 2; + } + + enum Order { + /** + * ## `Order` + * ```protobuf + * enum Order { + * ORDER_UNSPECIFIED = 0; + * ASCENDING = 1; + * DESCENDING = 2; + * } + * ``` + * + * order of index traversal, default: ascending. + * + * | Enum value | Description | + * |---------------------|----------------------------------| + * | `ORDER_UNSPECIFIED` | unspecified | + * | `ASCENDING` | ascending order index traversal | + * | `DESCENDING` | descending order index traversal | + * + */ + ORDER_UNSPECIFIED = 0; + ASCENDING = 1; + DESCENDING = 2; + } + } + + // client side filters executed after the query was returned from the database + message Filters { + /** + * ## `Filters` + * ```protobuf + * message Filters { + * repeated Article.ContentType type_includes = 1; + * repeated Article.ContentType type_excludes = 2; + * repeated Article.ContentSubType sub_type_includes = 3; + * repeated Article.ContentSubType sub_type_excludes = 4; + * } + * ``` + * If you need to further refine the Query results, you can optionally provide + * a filter expression. A filter expression determines which items within the + * Query results should be returned to you. All of the other results are discarded. + * + * A filter expression is applied after a Query finishes, but before the results + * are returned. Therefore, a Query consumes the same amount of read capacity, + * regardless of whether a filter expression is present. + * + * | Field name | Type | Description | + * |----------------------|---------------------------------|---------------------------------------------| + * | `type_includes` | [`ContentType`][6] | `type` to include into the result set | + * | `type_includes` | [`ContentType`][6] | `type` to exclude from the result set | + * | `sub_type_includes` | [`ContentSubType`][7] | `sub_type` to include into the result set | + * | `sub_type_excludes` | [`ContentSubType`][7] | `sub_type` to exclude from the result set | + * + * [6]: article.html#type + * [7]: article.html#sub_type + */ + repeated Article.ContentType type_includes = 1; + repeated Article.ContentType type_excludes = 2; + repeated Article.ContentSubType sub_type_includes = 3; + repeated Article.ContentSubType sub_type_excludes = 4; + } } message ListArticlesResponse { - repeated stroeer.core.v1.Article articles = 1; - // A token that can be sent as `page_token` to retrieve the next page. - // If this field is omitted, there are no subsequent pages. + /** + * ## `ListArticlesResponse` + * ```protobuf + * // query multiple articles + * message ListArticlesResponse { + * repeated stroeer.core.v1.Article articles = 1; + * string next_page_token = 2; + * } + * ``` + * + * | Field name | Type | Description | + * |------------------|---------------------|-------------------------------------------------------------| + * | `articles` | [`Article`][article]| list of articles that match the `query` and also the `filter`, otherwise `empty`. | + * | `next_page_token`| `string` | A token that can be sent as `page_token` to retrieve the next page. If this field is omitted, there are no subsequent pages. | + * + * [article]: article.html + */ + repeated stroeer.core.v1.Article articles = 1; string next_page_token = 2; } + +/** +* # `⚙︎ ListArticles` +* ```protobuf +* rpc ListSections (google.protobuf.Empty) returns (ListSectionsResponse) {} +* ``` +* +* list the available root sections +* +*/ + +message ListSectionsResponse { + /** + * ## `ListSectionsResponse` + * + * list all available `root_sections` that can be used in the [`query`](#query) above. + * + * + * ```protobuf + * message ListSectionsResponse { + * repeated string sections = 1; + * } + * ``` + */ + repeated string sections = 1; +} diff --git a/stroeer/fragment/v1/stage_service.proto b/stroeer/fragment/v1/stage_service.proto index 65c77d1b..c0186236 100644 --- a/stroeer/fragment/v1/stage_service.proto +++ b/stroeer/fragment/v1/stage_service.proto @@ -1,7 +1,7 @@ syntax = "proto3"; /** - * @FileArticle ℠StageService + * @FileArticle ⚙︎ StageService */ import "stroeer/page/stage/v1/stage.proto"; diff --git a/stroeer/page/article/v1/article_page_service.proto b/stroeer/page/article/v1/article_page_service.proto index 67f85842..2e440147 100644 --- a/stroeer/page/article/v1/article_page_service.proto +++ b/stroeer/page/article/v1/article_page_service.proto @@ -1,7 +1,7 @@ syntax = "proto3"; /** - * @FileArticle ℠ArticlePageService + * @FileArticle ⚙︎ ArticlePageService */ package stroeer.page.article.v1; diff --git a/stroeer/page/section/v1/section_page_service.proto b/stroeer/page/section/v1/section_page_service.proto index 77f27e95..42cee16c 100644 --- a/stroeer/page/section/v1/section_page_service.proto +++ b/stroeer/page/section/v1/section_page_service.proto @@ -1,7 +1,7 @@ syntax = "proto3"; /** - * @FileArticle ℠SectionPageService + * @FileArticle ⚙︎ SectionPageService */ package stroeer.page.section.v1;