Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Media-type parser #61987

Merged
merged 32 commits into from
Sep 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
5e42390
Split responsibility for format parsing
pgomulka Sep 2, 2020
01f001d
parse * and ndjson
pgomulka Sep 2, 2020
50a88a0
make format not accepting applicaiton/
pgomulka Sep 2, 2020
fb03ffd
post data request should parse applicaiton/json style
pgomulka Sep 2, 2020
09f281e
unused import
pgomulka Sep 2, 2020
8bc024f
fix sql parsing
pgomulka Sep 2, 2020
070508c
split format and accept header
pgomulka Sep 3, 2020
3c7ab16
fix and todos
pgomulka Sep 3, 2020
59a7f42
Merge branch 'master' into xcontent_format_parsing
pgomulka Sep 3, 2020
968b1c9
media type parser
pgomulka Sep 4, 2020
46f8f33
media type parser
pgomulka Sep 4, 2020
cbbe093
precommit
pgomulka Sep 4, 2020
222caee
rename and null check
pgomulka Sep 7, 2020
6bdec13
Merge branch 'master' into header_version_split
pgomulka Sep 7, 2020
ee97564
Merge branch 'master' into header_version_split
pgomulka Sep 7, 2020
7f52e11
fix text format parsing
pgomulka Sep 7, 2020
63db70c
Merge branch 'master' into header_version_split
pgomulka Sep 8, 2020
57ddb40
Apply suggestions from code review
pgomulka Sep 9, 2020
b5f1eff
code review follow up
pgomulka Sep 9, 2020
90e798d
Merge branch 'header_version_split' of github.com:pgomulka/elasticsea…
pgomulka Sep 9, 2020
fa49be4
javadoc and validation
pgomulka Sep 14, 2020
31d92ac
Update x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/pl…
pgomulka Sep 14, 2020
a925fbe
Merge branch 'header_version_split' of github.com:pgomulka/elasticsea…
pgomulka Sep 14, 2020
23c4e41
javadoc fix
pgomulka Sep 14, 2020
b1e3fb1
remove shortName
pgomulka Sep 14, 2020
c17a895
javadoc fix
pgomulka Sep 14, 2020
7d6bd08
fix compile error
pgomulka Sep 14, 2020
3c93954
fix test compile
pgomulka Sep 14, 2020
77068a8
Merge branch 'master' into header_version_split
pgomulka Sep 15, 2020
8fb0cd4
Merge branch 'master' into header_version_split
pgomulka Sep 15, 2020
63fb6c7
remove todo and a testcase
pgomulka Sep 16, 2020
4598a0a
Apply suggestions from code review
pgomulka Sep 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ static String convertResponseToJson(Response response) throws IOException {
if (entity.getContentType() == null) {
throw new IllegalStateException("Elasticsearch didn't return the [Content-Type] header, unable to parse response body");
}
XContentType xContentType = XContentType.fromMediaTypeOrFormat(entity.getContentType().getValue());
XContentType xContentType = XContentType.fromMediaType(entity.getContentType().getValue());
if (xContentType == null) {
throw new IllegalStateException("Unsupported Content-Type: " + entity.getContentType().getValue());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1899,7 +1899,7 @@ protected final <Resp> Resp parseEntity(final HttpEntity entity,
if (entity.getContentType() == null) {
throw new IllegalStateException("Elasticsearch didn't return the [Content-Type] header, unable to parse response body");
}
XContentType xContentType = XContentType.fromMediaTypeOrFormat(entity.getContentType().getValue());
XContentType xContentType = XContentType.fromMediaType(entity.getContentType().getValue());
if (xContentType == null) {
throw new IllegalStateException("Unsupported Content-Type: " + entity.getContentType().getValue());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class PostDataRequest implements Validatable, ToXContentObject {

public static final ConstructingObjectParser<PostDataRequest, Void> PARSER =
new ConstructingObjectParser<>("post_data_request",
(a) -> new PostDataRequest((String)a[0], XContentType.fromMediaTypeOrFormat((String)a[1]), new byte[0]));
(a) -> new PostDataRequest((String)a[0], XContentType.fromMediaType((String)a[1]), new byte[0]));

static {
PARSER.declareString(ConstructingObjectParser.constructorArg(), Job.ID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ public void testUpdate() throws IOException {

UpdateRequest parsedUpdateRequest = new UpdateRequest();

XContentType entityContentType = XContentType.fromMediaTypeOrFormat(entity.getContentType().getValue());
XContentType entityContentType = XContentType.fromMediaType(entity.getContentType().getValue());
try (XContentParser parser = createParser(entityContentType.xContent(), entity.getContent())) {
parsedUpdateRequest.fromXContent(parser);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.common.xcontent;

/**
* Abstracts a <a href="http://en.wikipedia.org/wiki/Internet_media_type">Media Type</a> and a format parameter.
* Media types are used as values on Content-Type and Accept headers
* format is an URL parameter, specifies response media type.
*/
public interface MediaType {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add javadocs to the class and methods?

/**
* Returns a type part of a MediaType
* i.e. application for application/json
*/
String type();

/**
* Returns a subtype part of a MediaType.
* i.e. json for application/json
*/
String subtype();

/**
* Returns a corresponding format for a MediaType. i.e. json for application/json media type
* Can differ from the MediaType's subtype i.e plain/text has a subtype of text but format is txt
*/
String format();
Copy link
Contributor

@jakelandis jakelandis Sep 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Format, shortName, and subType seem like 3 ways to represent the same thing, and would be great if we could get to a common term (subtype) across the board. I think we can just drop format in favor of subtype ?

Copy link
Contributor Author

@pgomulka pgomulka Sep 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in some cases these 3 are actually different. For instance - text/plain has subtype=plain, but format and shortName txt
similarly for versioned media types I plan this to be: application/vnd.elasticsearch+json;compatible-with=7 subtype is vnd.elasticsearch+json, format is json and shortName json

I think shortName is redundant and I will give it a go to remove it


/**
* returns a string representation of a media type.
*/
default String typeWithSubtype(){
return type() + "/" + subtype();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.common.xcontent;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

public class MediaTypeParser<T extends MediaType> {
private final Map<String, T> formatToMediaType;
private final Map<String, T> typeWithSubtypeToMediaType;

public MediaTypeParser(T[] acceptedMediaTypes) {
this(acceptedMediaTypes, Map.of());
}

public MediaTypeParser(T[] acceptedMediaTypes, Map<String, T> additionalMediaTypes) {
final int size = acceptedMediaTypes.length + additionalMediaTypes.size();
Map<String, T> formatMap = new HashMap<>(size);
Map<String, T> typeMap = new HashMap<>(size);
for (T mediaType : acceptedMediaTypes) {
typeMap.put(mediaType.typeWithSubtype(), mediaType);
formatMap.put(mediaType.format(), mediaType);
}
for (Map.Entry<String, T> entry : additionalMediaTypes.entrySet()) {
String typeWithSubtype = entry.getKey();
T mediaType = entry.getValue();

typeMap.put(typeWithSubtype.toLowerCase(Locale.ROOT), mediaType);
formatMap.put(mediaType.format(), mediaType);
}

this.formatToMediaType = Map.copyOf(formatMap);
this.typeWithSubtypeToMediaType = Map.copyOf(typeMap);
}

public T fromMediaType(String mediaType) {
ParsedMediaType parsedMediaType = parseMediaType(mediaType);
return parsedMediaType != null ? parsedMediaType.getMediaType() : null;
}

public T fromFormat(String format) {
if (format == null) {
return null;
}
return formatToMediaType.get(format.toLowerCase(Locale.ROOT));
}

/**
* parsing media type that follows https://tools.ietf.org/html/rfc7231#section-3.1.1.1
* @param headerValue a header value from Accept or Content-Type
* @return a parsed media-type
*/
public ParsedMediaType parseMediaType(String headerValue) {
if (headerValue != null) {
String[] split = headerValue.toLowerCase(Locale.ROOT).split(";");

String[] typeSubtype = split[0].trim().toLowerCase(Locale.ROOT)
.split("/");
if (typeSubtype.length == 2) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I completely follow the format variant of parsing... but I think this method does not support that... right ?

The previous version accepted either variants (format (json) or the mime type (application/json) ... so splitting these requires the caller to know know which one to call.

I see two cases TextTemplateEngine and RestSqlQueryAction where it is not immediately obvious that is correct. It seems strange that we would support this shorter name in some cases for parsing headers, but not others, and when the the format variant is used (e.g. json) is used we don't support parsing the parameters.

Is possible to just support the mime type for all content-type and accept header parsing ? (leaving the format soley to the realm of the query string parameter)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

4x yes.
so this is exactly the intention here.
re1 The parseMediaType is not supporting the format parsing.
re2 The caller has to know which one to call.
re3 There should not be a place were we parse a format but a value is taken from Accept or Content-Type headers. Values from headers should follow media-type format (type/subtype;parameters)
re4 I hope after this PR this will be clear that mime types are for headers, format for query parameter

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks.. i double checked TextTemplateEngine and while it appears that the former supported either application/json or json, I think we are fine (as-is here) to only support json for these reasons: 1) only applicable to inline mustache scripts in watcher 2) it is not documented and only lightly tested 3) only found one example of its usage in the wild (and it used json). Given the lack of documentation i doubt this is used much at all and if so likely uses the short form json.

I have not validated RestSqlQueryAction behavior

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RestSqlQueryAction was reviewed by @bpintea

String type = typeSubtype[0];
String subtype = typeSubtype[1];
T xContentType = typeWithSubtypeToMediaType.get(type + "/" + subtype);
if (xContentType != null) {
Map<String, String> parameters = new HashMap<>();
for (int i = 1; i < split.length; i++) {
//spaces are allowed between parameters, but not between '=' sign
String[] keyValueParam = split[i].trim().split("=");
if (keyValueParam.length != 2 || hasSpaces(keyValueParam[0]) || hasSpaces(keyValueParam[1])) {
return null;
}
parameters.put(keyValueParam[0].toLowerCase(Locale.ROOT), keyValueParam[1].toLowerCase(Locale.ROOT));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you need to check for keyValueParam.size before accessing [1] in the case of a malformed key= (no value)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, will add test cases

}
return new ParsedMediaType(xContentType, parameters);
}
}

}
return null;
}

private boolean hasSpaces(String s) {
return s.trim().equals(s) == false;
}

/**
* A media type object that contains all the information provided on a Content-Type or Accept header
*/
public class ParsedMediaType {
private final Map<String, String> parameters;
private final T mediaType;

public ParsedMediaType(T mediaType, Map<String, String> parameters) {
this.parameters = parameters;
this.mediaType = mediaType;
}

public T getMediaType() {
return mediaType;
}

public Map<String, String> getParameters() {
return parameters;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@
import org.elasticsearch.common.xcontent.smile.SmileXContent;
import org.elasticsearch.common.xcontent.yaml.YamlXContent;

import java.util.Locale;
import java.util.Objects;
import java.util.Map;

/**
* The content type of {@link org.elasticsearch.common.xcontent.XContent}.
*/
public enum XContentType {
public enum XContentType implements MediaType {

/**
* A JSON based content type.
Expand All @@ -47,7 +46,7 @@ public String mediaType() {
}

@Override
public String shortName() {
public String subtype() {
return "json";
}

Expand All @@ -66,7 +65,7 @@ public String mediaTypeWithoutParameters() {
}

@Override
public String shortName() {
public String subtype() {
return "smile";
}

Expand All @@ -85,7 +84,7 @@ public String mediaTypeWithoutParameters() {
}

@Override
public String shortName() {
public String subtype() {
return "yaml";
}

Expand All @@ -104,7 +103,7 @@ public String mediaTypeWithoutParameters() {
}

@Override
public String shortName() {
public String subtype() {
return "cbor";
}

Expand All @@ -114,54 +113,30 @@ public XContent xContent() {
}
};

public static final MediaTypeParser<XContentType> mediaTypeParser = new MediaTypeParser<>(XContentType.values(),
Map.of("application/*", JSON, "application/x-ndjson", JSON));


pgomulka marked this conversation as resolved.
Show resolved Hide resolved
/**
* Accepts either a format string, which is equivalent to {@link XContentType#shortName()} or a media type that optionally has
* parameters and attempts to match the value to an {@link XContentType}. The comparisons are done in lower case format and this method
* also supports a wildcard accept for {@code application/*}. This method can be used to parse the {@code Accept} HTTP header or a
* format query string parameter. This method will return {@code null} if no match is found
* Accepts a format string, which is most of the time is equivalent to {@link XContentType#subtype()}
* and attempts to match the value to an {@link XContentType}.
* The comparisons are done in lower case format.
* This method will return {@code null} if no match is found
*/
public static XContentType fromMediaTypeOrFormat(String mediaType) {
if (mediaType == null) {
return null;
}
for (XContentType type : values()) {
if (isSameMediaTypeOrFormatAs(mediaType, type)) {
return type;
}
}
final String lowercaseMediaType = mediaType.toLowerCase(Locale.ROOT);
if (lowercaseMediaType.startsWith("application/*")) {
return JSON;
}

return null;
public static XContentType fromFormat(String mediaType) {
return mediaTypeParser.fromFormat(mediaType);
}

/**
* Attempts to match the given media type with the known {@link XContentType} values. This match is done in a case-insensitive manner.
* The provided media type should not include any parameters. This method is suitable for parsing part of the {@code Content-Type}
* HTTP header. This method will return {@code null} if no match is found
* The provided media type can optionally has parameters.
* This method is suitable for parsing of the {@code Content-Type} and {@code Accept} HTTP headers.
* This method will return {@code null} if no match is found
*/
public static XContentType fromMediaType(String mediaType) {
final String lowercaseMediaType = Objects.requireNonNull(mediaType, "mediaType cannot be null").toLowerCase(Locale.ROOT);
for (XContentType type : values()) {
if (type.mediaTypeWithoutParameters().equals(lowercaseMediaType)) {
return type;
}
}
// we also support newline delimited JSON: http://specs.okfnlabs.org/ndjson/
if (lowercaseMediaType.toLowerCase(Locale.ROOT).equals("application/x-ndjson")) {
return XContentType.JSON;
}

return null;
public static XContentType fromMediaType(String mediaTypeHeaderValue) {
return mediaTypeParser.fromMediaType(mediaTypeHeaderValue);
}

private static boolean isSameMediaTypeOrFormatAs(String stringType, XContentType type) {
return type.mediaTypeWithoutParameters().equalsIgnoreCase(stringType) ||
stringType.toLowerCase(Locale.ROOT).startsWith(type.mediaTypeWithoutParameters().toLowerCase(Locale.ROOT) + ";") ||
type.shortName().equalsIgnoreCase(stringType);
}

private int index;

Expand All @@ -177,10 +152,19 @@ public String mediaType() {
return mediaTypeWithoutParameters();
}

public abstract String shortName();

public abstract XContent xContent();

public abstract String mediaTypeWithoutParameters();


@Override
public String type() {
return "application";
}

@Override
public String format() {
return subtype();
}
}
Loading