Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

common/json.c: Check that JSMN result is well-formed. #3761

Merged
merged 1 commit into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 230 additions & 1 deletion common/json.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,235 @@ const jsmntok_t *json_get_arr(const jsmntok_t tok[], size_t index)
return NULL;
}

/*-----------------------------------------------------------------------------
JSMN Result Validation Starts
-----------------------------------------------------------------------------*/
/*~ LIBJSMN is a fast, small JSON parsing library.
*
* "Fast, small" means it does not, in fact, do a
* lot of checking for invalid JSON.
*
* For example, by itself it would accept the strings
* `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid.
* Obviously those are not in any way valid JSON.
*
* This part of the code performs some filtering so
* that at least some of the invalid JSON that
* LIBJSMN accepts, will be rejected by
* json_parse_input.
*/

/*~ These functions are used in JSMN validation.
*
* The calling convention is that the "current" token
* is passed in as the first argument, and after the
* validator, is returned from the function.
*
* p = validate_jsmn_datum(p, end, valid);
*
* The reason has to do with typical C ABIs.
* Usually, the first few arguments are passed in via
* register, and the return value is also returned
* via register.
* This calling convention generally ensures that
* the current token pointer `p` is always in a
* register and is never forced into memory by the
* compiler.
*
* These functions are pre-declared here as they
* are interrecursive.
* Note that despite the recursion, `p` is only ever
* advanced, and there is only ever one `p` value,
* thus the overall algorithm is strict O(n)
* (*not* amortized) in time.
* The recursion does mean the algorithm is O(d)
* in memory (specifically stack frames), where d
* is the nestedness of objects in the input.
* This may become an issue later if we are in a
* stack-limited environment, such as if we actually
* went and used threads.
*/
/* Validate a *single* datum. */
static const jsmntok_t *
validate_jsmn_datum(const jsmntok_t *p,
const jsmntok_t *end,
bool *valid);
/*~ Validate a key-value pair.
*
* In JSMN, objects are not dictionaries.
* Instead, they are a sequence of datums.
*
* In fact, objects and arrays in JSMN are "the same",
* they only differ in delimiter characters.
*
* Of course, in "real" JSON, an object is a dictionary
* of key-value pairs.
*
* So what JSMN does is that the syntax "key": "value"
* is considered a *single* datum, a string "key"
* that contains a value "value".
*
* Indeed, JSMN accepts `["key": "value"]` as well as
* `{"item1", "item2"}`.
* The entire point of the validate_jsmn_result function
* is to reject such improper arrays and objects.
*/
static const jsmntok_t *
validate_jsmn_keyvalue(const jsmntok_t *p,
const jsmntok_t *end,
bool *valid);

static const jsmntok_t *
validate_jsmn_datum(const jsmntok_t *p,
const jsmntok_t *end,
bool *valid)
{
int i;
int sz;

if (p >= end) {
*valid = false;
return p;
}

switch (p->type) {
case JSMN_UNDEFINED:
case JSMN_STRING:
case JSMN_PRIMITIVE:
/* These types should not have sub-datums. */
if (p->size != 0)
*valid = false;
else
++p;
break;

case JSMN_ARRAY:
/* Save the array size; we will advance p. */
sz = p->size;
++p;
for (i = 0; i < sz; ++i) {
/* Arrays should only contain standard JSON datums. */
p = validate_jsmn_datum(p, end, valid);
if (!*valid)
break;
}
break;

case JSMN_OBJECT:
/* Save the object size; we will advance p. */
sz = p->size;
++p;
for (i = 0; i < sz; ++i) {
/* Objects should only contain key-value pairs. */
p = validate_jsmn_keyvalue(p, end, valid);
if (!*valid)
break;
}
break;

default:
*valid = false;
break;
}

return p;
}
/* Key-value pairs *must* be strings with size 1. */
static inline const jsmntok_t *
validate_jsmn_keyvalue(const jsmntok_t *p,
const jsmntok_t *end,
bool *valid)
{
if (p >= end) {
*valid = false;
return p;
}

/* Check key.
*
* JSMN parses the syntax `"key": "value"` as a
* JSMN_STRING of size 1, containing the value
* datum as a sub-datum.
*
* Thus, keys in JSON objects are really strings
* that "contain" the value, thus we check if
* the size is 1.
*
* JSMN supports a non-standard syntax such as
* `"key": 1 2 3 4`, which it considers as a
* string object that contains a sequence of
* sub-datums 1 2 3 4.
* The check below that p->size == 1 also
* incidentally rejects that non-standard
* JSON.
*/
if (p->type != JSMN_STRING || p->size != 1) {
*valid = false;
return p;
}

++p;
return validate_jsmn_datum(p, end, valid);
}

/** validate_jsmn_parse_output
*
* @brief Validates the result of jsmn_parse.
*
* @desc LIBJMSN is a small fast library, not a
* comprehensive library.
*
* This simply means that LIBJSMN will accept a
* *lot* of very strange text that is technically
* not JSON.
*
* For example, LIBJSMN would accept the strings
* `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid.
*
* This can lead to strange sequences of jsmntok_t
* objects.
* Unfortunately, most of our code assumes that
* the data fed into our JSON-RPC interface is
* valid JSON, and in particular is not invalid
* JSON that tickles LIBJSMN into emitting
* strange sequences of `jsmntok_t`.
*
* This function detects such possible problems
* and returns false if such an issue is found.
* If so, it is probably unsafe to pass the
* `jsmntok_t` generated by LIBJSMN to any other
* parts of our code.
*
* @param p - The first jsmntok_t token to process.
* This function does not assume that semantically
* only one JSON datum is processed; it does expect
* a sequence of complete JSON datums (which is
* what LIBJSMN *should* output).
* @param end - One past the end of jsmntok_t.
* Basically, this function is assured to read tokens
* starting at p up to end - 1.
* If p >= end, this will not validate anything and
* trivially return true.
*
* @return true if there appears to be no problem
* with the jsmntok_t sequence outputted by
* `jsmn_parse`, false otherwise.
*/
static bool
validate_jsmn_parse_output(const jsmntok_t *p, const jsmntok_t *end)
{
bool valid = true;

while (p < end && valid)
p = validate_jsmn_datum(p, end, &valid);

return valid;
}

/*-----------------------------------------------------------------------------
JSMN Result Validation Ends
-----------------------------------------------------------------------------*/

jsmntok_t *json_parse_input(const tal_t *ctx,
const char *input, int len, bool *valid)
{
Expand Down Expand Up @@ -338,7 +567,7 @@ jsmntok_t *json_parse_input(const tal_t *ctx,
ret = json_next(toks) - toks;

/* Cut to length and return. */
*valid = true;
*valid = validate_jsmn_parse_output(toks, toks + ret);
tal_resize(&toks, ret + 1);
/* Make sure last one is always referenceable. */
toks[ret].type = -1;
Expand Down
11 changes: 8 additions & 3 deletions common/test/run-json.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,20 +122,25 @@ static void test_json_tok_size(void)
assert(toks[0].size == 2);
assert(toks[1].size == 2);

buf = "{\"e1\" : {\"e2\", \"e3\"}}";
buf = "{\"e1\" : {\"e2\": 2, \"e3\": 3}}";
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
assert(ok);
/* size only counts *direct* children */
assert(toks[0].size == 1);
assert(toks[2].size == 2);

buf = "{\"e1\" : {\"e2\", \"e3\"}, \"e4\" : {\"e5\", \"e6\"}}";
buf = "{\"e1\" : {\"e2\": 2, \"e3\": 3}, \"e4\" : {\"e5\": 5, \"e6\": 6}}";
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
assert(ok);
/* size only counts *direct* children */
assert(toks[0].size == 2);
assert(toks[2].size == 2);
assert(toks[6].size == 2);
assert(toks[8].size == 2);

/* This should *not* parse! (used to give toks[0]->size == 3!) */
buf = "{ \"\" \"\" \"\" }";
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
assert(!ok);

/* This should *not* parse! (used to give toks[0]->size == 2!) */
buf = "{ 'satoshi', '546' }";
Expand Down