diff --git a/docs/content/3.manual/manual.yml b/docs/content/3.manual/manual.yml index a89feb678b..31c7b2f064 100644 --- a/docs/content/3.manual/manual.yml +++ b/docs/content/3.manual/manual.yml @@ -92,6 +92,17 @@ sections: Output the jq version and exit with zero. + * `--seq`: + + Use the `application/json-seq` MIME type scheme for separating + JSON texts in jq's input and output. This means that an ASCII + RS (record separator) character is printed before each value on + output and an ASCII LF (line feed) is printed after every + output. Input JSON texts that fail to parse are ignored (but + warned about), discarding all subsequent input until the next + RS. This more also parses the output of jq without the `--seq` + option. + * `--slurp`/`-s`: Instead of running the filter for each JSON object in the diff --git a/jv.h b/jv.h index 465070ad0c..08b89aec44 100644 --- a/jv.h +++ b/jv.h @@ -156,6 +156,8 @@ void jv_dump(jv, int flags); void jv_show(jv, int flags); jv jv_dump_string(jv, int flags); +#define JV_PARSE_SEQ 1 + jv jv_parse(const char* string); jv jv_parse_sized(const char* string, int length); diff --git a/jv_parse.c b/jv_parse.c index 5b703fd712..e534e93bb5 100644 --- a/jv_parse.c +++ b/jv_parse.c @@ -24,6 +24,8 @@ struct jv_parser { int curr_buf_is_partial; unsigned bom_strip_position; + int flags; + jv* stack; int stackpos; int stacklen; @@ -40,12 +42,15 @@ struct jv_parser { enum { JV_PARSER_NORMAL, JV_PARSER_STRING, - JV_PARSER_STRING_ESCAPE + JV_PARSER_STRING_ESCAPE, + JV_PARSER_WAITING_FOR_RS // parse error, waiting for RS } st; + unsigned int last_ch_was_ws:1; }; static void parser_init(struct jv_parser* p) { + p->flags = 0; p->stack = 0; p->stacklen = p->stackpos = 0; p->next = jv_invalid(); @@ -60,10 +65,18 @@ static void parser_init(struct jv_parser* p) { jvp_dtoa_context_init(&p->dtoa); } -static void parser_free(struct jv_parser* p) { +static void parser_reset(struct jv_parser* p) { jv_free(p->next); + p->next = jv_invalid(); for (int i=0; istackpos; i++) jv_free(p->stack[i]); + p->stackpos = 0; + p->tokenpos = 0; + p->st = JV_PARSER_NORMAL; +} + +static void parser_free(struct jv_parser* p) { + parser_reset(p); jv_mem_free(p->stack); jv_mem_free(p->tokenbuf); jvp_dtoa_context_free(&p->dtoa); @@ -330,9 +343,26 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) { p->line++; p->column = 0; } + if (ch == '\036' /* ASCII RS; see draft-ietf-json-sequence-07 */) { + TRY(check_literal(p)); + if (p->st == JV_PARSER_NORMAL && check_done(p, out)) { + if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(*out) == JV_KIND_NUMBER) { + jv_free(*out); + *out = jv_invalid(); + return "Potentially truncated top-level numeric value"; + } + return OK; + } + parser_reset(p); + *out = jv_invalid(); + return "Truncated value"; + } presult answer = 0; + p->last_ch_was_ws = 0; if (p->st == JV_PARSER_NORMAL) { chclass cls = classify(ch); + if (cls == WHITESPACE) + p->last_ch_was_ws = 1; if (cls != LITERAL) { TRY(check_literal(p)); if (check_done(p, out)) answer = OK; @@ -373,6 +403,7 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) { struct jv_parser* jv_parser_new(int flags) { struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser)); parser_init(p); + p->flags = flags; return p; } @@ -412,14 +443,22 @@ jv jv_parser_next(struct jv_parser* p) { assert(p->curr_buf && "a buffer must be provided"); if (p->bom_strip_position == 0xff) return jv_invalid_with_msg(jv_string("Malformed BOM")); jv value; + char ch; presult msg = 0; while (!msg && p->curr_buf_pos < p->curr_buf_length) { - char ch = p->curr_buf[p->curr_buf_pos++]; + ch = p->curr_buf[p->curr_buf_pos++]; + if (ch != '\036' && p->st == JV_PARSER_WAITING_FOR_RS) + continue; // need to resync, wait for RS msg = scan(p, ch, &value); } if (msg == OK) { return value; } else if (msg) { + parser_reset(p); + if (ch != '\036' && (p->flags & JV_PARSE_SEQ)) { + p->st = JV_PARSER_WAITING_FOR_RS; + return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d (need RS to resync)", msg, p->line, p->column)); + } return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d", msg, p->line, p->column)); } else if (p->curr_buf_is_partial) { assert(p->curr_buf_pos == p->curr_buf_length); @@ -428,16 +467,31 @@ jv jv_parser_next(struct jv_parser* p) { } else { assert(p->curr_buf_pos == p->curr_buf_length); // at EOF - if (p->st != JV_PARSER_NORMAL) - return jv_invalid_with_msg(jv_string("Unfinished string")); - if ((msg = check_literal(p))) - return jv_invalid_with_msg(jv_string(msg)); - if (p->stackpos != 0) - return jv_invalid_with_msg(jv_string("Unfinished JSON term")); + if (p->st != JV_PARSER_WAITING_FOR_RS) { + if (p->st != JV_PARSER_NORMAL) { + parser_reset(p); + p->st = JV_PARSER_WAITING_FOR_RS; + return jv_invalid_with_msg(jv_string("Unfinished string")); + } + if ((msg = check_literal(p))) { + parser_reset(p); + p->st = JV_PARSER_WAITING_FOR_RS; + return jv_invalid_with_msg(jv_string(msg)); + } + if (p->stackpos != 0) { + parser_reset(p); + p->st = JV_PARSER_WAITING_FOR_RS; + return jv_invalid_with_msg(jv_string("Unfinished JSON term")); + } + } // p->next is either invalid (nothing here but no syntax error) // or valid (this is the value). either way it's the thing to return value = p->next; p->next = jv_invalid(); + if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(value) == JV_KIND_NUMBER) { + jv_free(value); + return jv_invalid_with_msg(jv_string("Potentially truncated top-level numeric value")); + } return value; } } diff --git a/main.c b/main.c index 335ca7b2ab..8ebdb9fc0f 100644 --- a/main.c +++ b/main.c @@ -90,8 +90,9 @@ enum { UNBUFFERED_OUTPUT = 2048, EXIT_STATUS = 4096, IN_PLACE = 8192, + SEQ = 16384, /* debugging only */ - DUMP_DISASM = 16384, + DUMP_DISASM = 32768, }; static int options = 0; @@ -122,6 +123,8 @@ static int process(jq_state *jq, jv value, int flags) { ret = 11; else ret = 0; + if (options & SEQ) + fwrite("\036", 1, 1, stdout); jv_dump(result, dumpopts); } if (!(options & RAW_NO_LF)) @@ -284,6 +287,10 @@ int main(int argc, char* argv[]) { options |= IN_PLACE; if (!short_opts) continue; } + if (isoption(argv[i], 0, "seq", &short_opts)) { + options |= SEQ; + if (!short_opts) continue; + } if (isoption(argv[i], 'e', "exit-status", &short_opts)) { options |= EXIT_STATUS; if (!short_opts) continue; @@ -444,7 +451,7 @@ int main(int argc, char* argv[]) { slurped = jv_array(); } } - struct jv_parser* parser = jv_parser_new(0); + struct jv_parser* parser = jv_parser_new((options & SEQ) ? JV_PARSE_SEQ : 0); char buf[4096]; int is_last = 0; while (read_more(buf, sizeof(buf), &is_last)) { @@ -461,22 +468,28 @@ int main(int argc, char* argv[]) { } else { jv_parser_set_buf(parser, buf, strlen(buf), !is_last); jv value; - while (jv_is_valid((value = jv_parser_next(parser)))) { + while (jv_is_valid(value = jv_parser_next(parser)) || jv_invalid_has_msg(jv_copy(value))) { + if (!jv_is_valid(value)) { + jv msg = jv_invalid_get_msg(value); + if (!(options & SEQ)) { + // We used to treat parse errors as fatal... + ret = 4; + fprintf(stderr, "parse error: %s\n", jv_string_value(msg)); + jv_free(msg); + break; + } + fprintf(stderr, "ignoring parse error: %s\n", jv_string_value(msg)); + jv_free(msg); + // ...but with --seq we attempt to recover. + continue; + } if (options & SLURP) { slurped = jv_array_append(slurped, value); } else { ret = process(jq, value, jq_flags); + value = jv_invalid(); } } - if (jv_invalid_has_msg(jv_copy(value))) { - jv msg = jv_invalid_get_msg(value); - fprintf(stderr, "parse error: %s\n", jv_string_value(msg)); - jv_free(msg); - ret = 4; - break; - } else { - jv_free(value); - } } } jv_parser_free(parser); diff --git a/tests/run b/tests/run index 0b5fcafcb0..252ad73227 100755 --- a/tests/run +++ b/tests/run @@ -78,6 +78,46 @@ case "$v" in *) true;; esac +## Test JSON sequence support + +cat > $d/expected < /dev/null 2> $d/out +cmp $d/out $d/expected + +cat > $d/expected < /dev/null 2> $d/out +cmp $d/out $d/expected + +# Note that here jq sees no inputs at all but it still succeeds because +# --seq ignores parse errors +cat > $d/expected < $d/out 2>&1 +cmp $d/out $d/expected + +# Numeric values truncated by EOF are ignored +cat > $d/expected < $d/out 2>&1 +cmp $d/out $d/expected + +cat > $d/expected </dev/null 2> $d/out +cmp $d/out $d/expected + ## Test library/module system mods=$PWD/tests/modules