Skip to content

Commit ef44a6c

Browse files
raboofjrudolph
authored andcommitted
core: percent encode illegal chars when creating URI query (akka#3003)
This makes sure 'strict' parsing of the Uri query string only allows characters that are actually allowed here by the RFC. It does not change the fact that Uri's rawQueryString is the percent-encoded representation of the query string, since this information is needed to parse it into a key-value structure. When parsing the Uri in 'relaxed' mode or constructing programmatically, we do accept characters that are not allowed, but percent-encode any would-be invalid characters. API's that can be used to programmatically add or modify the query string are adapted to not accept invalid percent-encoded values (e.g. %x2) and convert illegal characters. withRawQueryString is made available both in a 'strict' and a 'relaxed' variation. Fixes akka#3002 (cherry picked from commit e284816)
1 parent a3337a8 commit ef44a6c

File tree

12 files changed

+230
-56
lines changed

12 files changed

+230
-56
lines changed

akka-http-core/src/main/java/akka/http/javadsl/model/Uri.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,33 @@ public abstract class Uri {
123123

124124
/**
125125
* Returns a copy of this instance with a new query.
126+
*
127+
* Characters that are not within the range described at https://tools.ietf.org/html/rfc3986#section-3.4
128+
* should be percent-encoded. Characters that are in that range may or may not be percent-encoded,
129+
* and depending on how the query string is parsed this might be relevant: for example, when interpreting
130+
* the query string as 'key=value' pairs you could use the percent-encoded '=' ('%22) to include a '=' in the
131+
* key or value.
132+
*
133+
* When characters are encountered that are outside of the RFC3986 range they are automatically
134+
* percent-encoded, but be aware that relying on this is usually a programming error.
126135
*/
127136
public abstract Uri rawQueryString(String rawQuery);
128137

138+
/**
139+
* Returns a copy of this instance with a new query.
140+
*
141+
* Characters that are not within the range described at https://tools.ietf.org/html/rfc3986#section-3.4
142+
* should be percent-encoded. Characters that are in that range may or may not be percent-encoded,
143+
* and depending on how the query string is parsed this might be relevant: for example, when interpreting
144+
* the query string as 'key=value' pairs you could use the percent-encoded '=' ('%22) to include a '=' in the
145+
* key or value.
146+
*
147+
* @param strict depending on the 'strict' flag, characters outside of the range allowed by RFC3986 will
148+
* either cause a `IllegalUriException` or be automatically percent-encoded. Be aware that relying
149+
* on automatic percent-encoding is usually a programming error.
150+
*/
151+
public abstract Uri rawQueryString(String rawQuery, boolean strict);
152+
129153
/**
130154
* Returns a copy of this instance with a new query.
131155
*/
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Not for inheritence
2+
ProblemFilters.exclude[ReversedMissingMethodProblem]("akka.http.javadsl.model.Uri.rawQueryString")
3+
4+
# private[http]
5+
ProblemFilters.exclude[DirectMissingMethodProblem]("akka.http.scaladsl.model.Uri.resolve")
6+
7+
# impl
8+
ProblemFilters.exclude[DirectMissingMethodProblem]("akka.http.impl.model.parser.CharacterClasses.raw-query-char")

akka-http-core/src/main/scala/akka/http/impl/model/JavaUri.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,10 @@ private[http] case class JavaUri(uri: sm.Uri) extends jm.Uri {
6868

6969
def toRelative: jm.Uri = t(_.toRelative)
7070

71-
def rawQueryString(rawQuery: String): jm.Uri = t(_.withRawQueryString(rawQuery))
71+
def rawQueryString(rawQuery: String): jm.Uri = rawQueryString(rawQuery, false)
72+
def rawQueryString(rawQuery: String, strict: Boolean): jm.Uri =
73+
t(_.withRawQueryString(rawQuery, if (strict) sm.Uri.ParsingMode.Strict else sm.Uri.ParsingMode.Relaxed))
74+
7275
def query(query: jm.Query): jm.Uri = t(_.withQuery(query.asScala))
7376

7477
def addPathSegment(segment: String): jm.Uri = t { u =>

akka-http-core/src/main/scala/akka/http/impl/model/parser/CharacterClasses.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ private[http] object CharacterClasses {
4747
val `strict-query-value-char` = `query-fragment-char` -- "&=;"
4848
val `strict-query-char-np` = `strict-query-value-char` -- '+'
4949

50+
val `relaxed-query-char` = VCHAR -- "%#"
5051
val `relaxed-fragment-char` = VCHAR -- '%'
5152
val `relaxed-path-segment-char` = VCHAR -- "%/?#"
5253
val `relaxed-query-key-char` = VCHAR -- "%&=#"
5354
val `relaxed-query-value-char` = VCHAR -- "%&#"
54-
val `raw-query-char` = VCHAR -- '#'
5555
val `scheme-char` = ALPHA ++ DIGIT ++ '+' ++ '-' ++ '.'
5656

5757
val `userinfo-char` = unreserved ++ `sub-delims` ++ ':'

akka-http-core/src/main/scala/akka/http/impl/model/parser/UriParser.scala

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ package akka.http.impl.model.parser
77
import java.nio.charset.Charset
88

99
import akka.parboiled2._
10-
import akka.http.impl.util.enhanceString_
11-
import akka.http.scaladsl.model.Uri
10+
import akka.http.impl.util.{ StringRendering, enhanceString_ }
11+
import akka.http.scaladsl.model.{ Uri, UriRendering }
1212
import akka.http.scaladsl.model.headers.HttpOrigin
1313
import Parser.DeliveryScheme.Either
1414
import Uri._
@@ -36,7 +36,7 @@ private[http] final class UriParser(
3636

3737
def parseAbsoluteUri(): Uri =
3838
rule(`absolute-URI` ~ EOI).run() match {
39-
case Right(_) => create(_scheme, _userinfo, _host, _port, collapseDotSegments(_path), _rawQueryString, _fragment)
39+
case Right(_) => createUnsafe(_scheme, Authority(_host, _port, _userinfo), collapseDotSegments(_path), _rawQueryString, _fragment)
4040
case Left(error) => fail(error, "absolute URI")
4141
}
4242

@@ -48,7 +48,7 @@ private[http] final class UriParser(
4848

4949
def parseAndResolveUriReference(base: Uri): Uri =
5050
rule(`URI-reference` ~ EOI).run() match {
51-
case Right(_) => resolve(_scheme, _userinfo, _host, _port, _path, _rawQueryString, _fragment, base)
51+
case Right(_) => resolveUnsafe(_scheme, _userinfo, _host, _port, _path, _rawQueryString, _fragment, base)
5252
case Left(error) => fail(error, "URI reference")
5353
}
5454

@@ -64,6 +64,30 @@ private[http] final class UriParser(
6464
case Left(error) => fail(error, "URI host")
6565
}
6666

67+
/**
68+
* @return a 'raw' (percent-encoded) query string that does not contain invalid characters.
69+
*/
70+
def parseRawQueryString(): String = {
71+
rule(rawQueryString ~ EOI).run() match {
72+
case Right(()) => parseSafeRawQueryString(sb.toString)
73+
case Left(error) => fail(error, "rawQueryString")
74+
}
75+
}
76+
77+
/**
78+
* @param rawQueryString 'raw' (percent-encoded) query string that in Relaxed mode may contain characters not allowed
79+
* by https://tools.ietf.org/html/rfc3986#section-3.4 but is guaranteed not to have invalid percent-encoded characters
80+
* @return a 'raw' (percent-encoded) query string that does not contain invalid characters.
81+
*/
82+
def parseSafeRawQueryString(rawQueryString: String): String = uriParsingMode match {
83+
case Uri.ParsingMode.Strict =>
84+
// Cannot contain invalid characters in strict mode
85+
rawQueryString
86+
case Uri.ParsingMode.Relaxed =>
87+
// Percent-encode invalid characters
88+
UriRendering.encode(new StringRendering, rawQueryString, uriParsingCharset, `query-fragment-char` ++ '%', false).get
89+
}
90+
6791
def parseQuery(): Query =
6892
rule(query ~ EOI).run() match {
6993
case Right(query) => query
@@ -85,6 +109,10 @@ private[http] final class UriParser(
85109
case Uri.ParsingMode.Strict => `pchar-base`
86110
case _ => `relaxed-path-segment-char`
87111
}
112+
private[this] val `query-char` = uriParsingMode match {
113+
case Uri.ParsingMode.Strict => `query-fragment-char`
114+
case _ => `relaxed-query-char`
115+
}
88116
private[this] val `query-key-char` = uriParsingMode match {
89117
case Uri.ParsingMode.Strict => `strict-query-key-char`
90118
case Uri.ParsingMode.Relaxed => `relaxed-query-key-char`
@@ -103,6 +131,10 @@ private[http] final class UriParser(
103131
private[this] var _host: Host = Host.Empty
104132
private[this] var _port: Int = 0
105133
private[this] var _path: Path = Path.Empty
134+
/**
135+
* Percent-encoded. When in in 'relaxed' mode, characters not permitted by https://tools.ietf.org/html/rfc3986#section-3.4
136+
* are already automatically percent-encoded here
137+
*/
106138
private[this] var _rawQueryString: Option[String] = None
107139
private[this] var _fragment: Option[String] = None
108140

@@ -111,7 +143,7 @@ private[http] final class UriParser(
111143
private[this] def setHost(host: Host): Unit = _host = host
112144
private[this] def setPort(port: Int): Unit = _port = port
113145
private[this] def setPath(path: Path): Unit = _path = path
114-
private[this] def setRawQueryString(rawQueryString: String): Unit = _rawQueryString = Some(rawQueryString)
146+
private[this] def setRawQueryString(rawQueryString: String): Unit = _rawQueryString = Some(parseSafeRawQueryString(rawQueryString))
115147
private[this] def setFragment(fragment: String): Unit = _fragment = Some(fragment)
116148

117149
// http://tools.ietf.org/html/rfc3986#appendix-A
@@ -194,7 +226,7 @@ private[http] final class UriParser(
194226
def pchar = rule { `path-segment-char` ~ appendSB() | `pct-encoded` }
195227

196228
def rawQueryString = rule {
197-
clearSB() ~ oneOrMore(`raw-query-char` ~ appendSB()) ~ run(setRawQueryString(sb.toString)) | run(setRawQueryString(""))
229+
clearSB() ~ oneOrMore(`query-char` ~ appendSB() | `pct-encoded`) ~ run(setRawQueryString(sb.toString)) | run(setRawQueryString(""))
198230
}
199231

200232
// http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1
@@ -262,7 +294,7 @@ private[http] final class UriParser(
262294
rule(`request-target` ~ EOI).run() match {
263295
case Right(_) =>
264296
val path = if (_scheme.isEmpty) _path else collapseDotSegments(_path)
265-
create(_scheme, _userinfo, _host, _port, path, _rawQueryString, _fragment)
297+
createUnsafe(_scheme, Authority(_host, _port, _userinfo), path, _rawQueryString, _fragment)
266298
case Left(error) => fail(error, "request-target")
267299
}
268300

@@ -283,6 +315,10 @@ private[http] final class UriParser(
283315
`absolute-path` ~ optional('?' ~ rawQueryString) // origin-form
284316
) // TODO: asterisk-form
285317

318+
/**
319+
* @return path and percent-encoded query string. When in in 'relaxed' mode, characters not permitted by https://tools.ietf.org/html/rfc3986#section-3.4
320+
* are already automatically percent-encoded here
321+
*/
286322
def parseHttp2PathPseudoHeader(): (Uri.Path, Option[String]) =
287323
rule(`http2-path-pseudo-header` ~ EOI).run() match {
288324
case Right(_) =>
@@ -309,7 +345,6 @@ private[http] final class UriParser(
309345

310346
private def createUriReference(): Uri = {
311347
val path = if (_scheme.isEmpty) _path else collapseDotSegments(_path)
312-
create(_scheme, _userinfo, _host, normalizePort(_port, _scheme), path, _rawQueryString, _fragment)
348+
createUnsafe(_scheme, Authority(_host, normalizePort(_port, _scheme), _userinfo), path, _rawQueryString, _fragment)
313349
}
314350
}
315-

0 commit comments

Comments
 (0)