Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core: percent encode illegal chars when creating URI query #3003

Merged
merged 19 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions akka-http-core/src/main/java/akka/http/javadsl/model/Uri.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,33 @@ public abstract class Uri {

/**
* Returns a copy of this instance with a new query.
*
* Characters that are not within the range described at https://tools.ietf.org/html/rfc3986#section-3.4
* should be percent-encoded. Characters that are in that range may or may not be percent-encoded,
* and depending on how the query string is parsed this might be relevant: for example, when interpreting
* the query string as 'key=value' pairs you could use the percent-encoded '=' ('%22) to include a '=' in the
* key or value.
*
* When characters are encountered that are outside of the RFC3986 range they are automatically
* percent-encoded, but be aware that relying on this is usually a programming error.
*/
public abstract Uri rawQueryString(String rawQuery);

/**
* Returns a copy of this instance with a new query.
*
* Characters that are not within the range described at https://tools.ietf.org/html/rfc3986#section-3.4
* should be percent-encoded. Characters that are in that range may or may not be percent-encoded,
* and depending on how the query string is parsed this might be relevant: for example, when interpreting
* the query string as 'key=value' pairs you could use the percent-encoded '=' ('%22) to include a '=' in the
* key or value.
*
* @param strict depending on the 'strict' flag, characters outside of the range allowed by RFC3986 will
* either cause a `IllegalUriException` or be automatically percent-encoded. Be aware that relying
* on automatic percent-encoding is usually a programming error.
*/
public abstract Uri rawQueryString(String rawQuery, boolean strict);

/**
* Returns a copy of this instance with a new query.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Not for inheritence
ProblemFilters.exclude[ReversedMissingMethodProblem]("akka.http.javadsl.model.Uri.rawQueryString")

# private[http]
ProblemFilters.exclude[DirectMissingMethodProblem]("akka.http.scaladsl.model.Uri.resolve")

# impl
ProblemFilters.exclude[DirectMissingMethodProblem]("akka.http.impl.model.parser.CharacterClasses.raw-query-char")
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ private[http] case class JavaUri(uri: sm.Uri) extends jm.Uri {

def toRelative: jm.Uri = t(_.toRelative)

def rawQueryString(rawQuery: String): jm.Uri = t(_.withRawQueryString(rawQuery))
def rawQueryString(rawQuery: String): jm.Uri = rawQueryString(rawQuery, false)
def rawQueryString(rawQuery: String, strict: Boolean): jm.Uri =
t(_.withRawQueryString(rawQuery, if (strict) sm.Uri.ParsingMode.Strict else sm.Uri.ParsingMode.Relaxed))

def query(query: jm.Query): jm.Uri = t(_.withQuery(query.asScala))

def addPathSegment(segment: String): jm.Uri = t { u =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ private[http] object CharacterClasses {
val `strict-query-value-char` = `query-fragment-char` -- "&=;"
val `strict-query-char-np` = `strict-query-value-char` -- '+'

val `relaxed-query-char` = VCHAR -- "%#"
val `relaxed-fragment-char` = VCHAR -- '%'
val `relaxed-path-segment-char` = VCHAR -- "%/?#"
val `relaxed-query-key-char` = VCHAR -- "%&=#"
val `relaxed-query-value-char` = VCHAR -- "%&#"
val `raw-query-char` = VCHAR -- '#'
val `scheme-char` = ALPHA ++ DIGIT ++ '+' ++ '-' ++ '.'

val `userinfo-char` = unreserved ++ `sub-delims` ++ ':'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ package akka.http.impl.model.parser
import java.nio.charset.Charset

import akka.parboiled2._
import akka.http.impl.util.enhanceString_
import akka.http.scaladsl.model.Uri
import akka.http.impl.util.{ StringRendering, enhanceString_ }
import akka.http.scaladsl.model.{ Uri, UriRendering }
import akka.http.scaladsl.model.headers.HttpOrigin
import Parser.DeliveryScheme.Either
import Uri._
Expand Down Expand Up @@ -36,7 +36,7 @@ private[http] final class UriParser(

def parseAbsoluteUri(): Uri =
rule(`absolute-URI` ~ EOI).run() match {
case Right(_) => create(_scheme, _userinfo, _host, _port, collapseDotSegments(_path), _rawQueryString, _fragment)
case Right(_) => createUnsafe(_scheme, Authority(_host, _port, _userinfo), collapseDotSegments(_path), _rawQueryString, _fragment)
case Left(error) => fail(error, "absolute URI")
}

Expand All @@ -48,7 +48,7 @@ private[http] final class UriParser(

def parseAndResolveUriReference(base: Uri): Uri =
rule(`URI-reference` ~ EOI).run() match {
case Right(_) => resolve(_scheme, _userinfo, _host, _port, _path, _rawQueryString, _fragment, base)
case Right(_) => resolveUnsafe(_scheme, _userinfo, _host, _port, _path, _rawQueryString, _fragment, base)
case Left(error) => fail(error, "URI reference")
}

Expand All @@ -64,6 +64,30 @@ private[http] final class UriParser(
case Left(error) => fail(error, "URI host")
}

/**
* @return a 'raw' (percent-encoded) query string that does not contain invalid characters.
*/
def parseRawQueryString(): String = {
rule(rawQueryString ~ EOI).run() match {
case Right(()) => parseSafeRawQueryString(sb.toString)
case Left(error) => fail(error, "rawQueryString")
}
}

/**
* @param rawQueryString 'raw' (percent-encoded) query string that in Relaxed mode may contain characters not allowed
* by https://tools.ietf.org/html/rfc3986#section-3.4 but is guaranteed not to have invalid percent-encoded characters
* @return a 'raw' (percent-encoded) query string that does not contain invalid characters.
*/
def parseSafeRawQueryString(rawQueryString: String): String = uriParsingMode match {
case Uri.ParsingMode.Strict =>
// Cannot contain invalid characters in strict mode
rawQueryString
case Uri.ParsingMode.Relaxed =>
// Percent-encode invalid characters
UriRendering.encode(new StringRendering, rawQueryString, uriParsingCharset, `query-fragment-char` ++ '%', false).get
}

def parseQuery(): Query =
rule(query ~ EOI).run() match {
case Right(query) => query
Expand All @@ -85,6 +109,10 @@ private[http] final class UriParser(
case Uri.ParsingMode.Strict => `pchar-base`
case _ => `relaxed-path-segment-char`
}
private[this] val `query-char` = uriParsingMode match {
case Uri.ParsingMode.Strict => `query-fragment-char`
case _ => `relaxed-query-char`
}
private[this] val `query-key-char` = uriParsingMode match {
case Uri.ParsingMode.Strict => `strict-query-key-char`
case Uri.ParsingMode.Relaxed => `relaxed-query-key-char`
Expand All @@ -103,6 +131,10 @@ private[http] final class UriParser(
private[this] var _host: Host = Host.Empty
private[this] var _port: Int = 0
private[this] var _path: Path = Path.Empty
/**
* Percent-encoded. When in in 'relaxed' mode, characters not permitted by https://tools.ietf.org/html/rfc3986#section-3.4
* are already automatically percent-encoded here
*/
private[this] var _rawQueryString: Option[String] = None
private[this] var _fragment: Option[String] = None

Expand All @@ -111,7 +143,7 @@ private[http] final class UriParser(
private[this] def setHost(host: Host): Unit = _host = host
private[this] def setPort(port: Int): Unit = _port = port
private[this] def setPath(path: Path): Unit = _path = path
private[this] def setRawQueryString(rawQueryString: String): Unit = _rawQueryString = Some(rawQueryString)
private[this] def setRawQueryString(rawQueryString: String): Unit = _rawQueryString = Some(parseSafeRawQueryString(rawQueryString))
private[this] def setFragment(fragment: String): Unit = _fragment = Some(fragment)

// http://tools.ietf.org/html/rfc3986#appendix-A
Expand Down Expand Up @@ -194,7 +226,7 @@ private[http] final class UriParser(
def pchar = rule { `path-segment-char` ~ appendSB() | `pct-encoded` }

def rawQueryString = rule {
clearSB() ~ oneOrMore(`raw-query-char` ~ appendSB()) ~ run(setRawQueryString(sb.toString)) | run(setRawQueryString(""))
clearSB() ~ oneOrMore(`query-char` ~ appendSB() | `pct-encoded`) ~ run(setRawQueryString(sb.toString)) | run(setRawQueryString(""))
jrudolph marked this conversation as resolved.
Show resolved Hide resolved
}

// http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1
Expand Down Expand Up @@ -262,7 +294,7 @@ private[http] final class UriParser(
rule(`request-target` ~ EOI).run() match {
case Right(_) =>
val path = if (_scheme.isEmpty) _path else collapseDotSegments(_path)
create(_scheme, _userinfo, _host, _port, path, _rawQueryString, _fragment)
createUnsafe(_scheme, Authority(_host, _port, _userinfo), path, _rawQueryString, _fragment)
case Left(error) => fail(error, "request-target")
}

Expand All @@ -283,6 +315,10 @@ private[http] final class UriParser(
`absolute-path` ~ optional('?' ~ rawQueryString) // origin-form
) // TODO: asterisk-form

/**
* @return path and percent-encoded query string. When in in 'relaxed' mode, characters not permitted by https://tools.ietf.org/html/rfc3986#section-3.4
* are already automatically percent-encoded here
*/
def parseHttp2PathPseudoHeader(): (Uri.Path, Option[String]) =
rule(`http2-path-pseudo-header` ~ EOI).run() match {
case Right(_) =>
Expand All @@ -309,7 +345,6 @@ private[http] final class UriParser(

private def createUriReference(): Uri = {
val path = if (_scheme.isEmpty) _path else collapseDotSegments(_path)
create(_scheme, _userinfo, _host, normalizePort(_port, _scheme), path, _rawQueryString, _fragment)
createUnsafe(_scheme, Authority(_host, normalizePort(_port, _scheme), _userinfo), path, _rawQueryString, _fragment)
}
}

Loading