Skip to content

Commit

Permalink
Add support for JSON sequence MIME type
Browse files Browse the repository at this point in the history
Per draft-ietf-json-text-sequence-07 (which soon will be published as an
RFC).
  • Loading branch information
nicowilliams committed Oct 12, 2014
1 parent 3411167 commit 89791a0
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 21 deletions.
11 changes: 11 additions & 0 deletions docs/content/3.manual/manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,17 @@ sections:
Output the jq version and exit with zero.
* `--seq`:
Use the `application/json-seq` MIME type scheme for separating
JSON texts in jq's input and output. This means that an ASCII
RS (record separator) character is printed before each value on
output and an ASCII LF (line feed) is printed after every
output. Input JSON texts that fail to parse are ignored (but
warned about), discarding all subsequent input until the next
RS. This more also parses the output of jq without the `--seq`
option.
* `--slurp`/`-s`:
Instead of running the filter for each JSON object in the
Expand Down
2 changes: 2 additions & 0 deletions jv.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ void jv_dump(jv, int flags);
void jv_show(jv, int flags);
jv jv_dump_string(jv, int flags);

#define JV_PARSE_SEQ 1

jv jv_parse(const char* string);
jv jv_parse_sized(const char* string, int length);

Expand Down
72 changes: 63 additions & 9 deletions jv_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ struct jv_parser {
int curr_buf_is_partial;
unsigned bom_strip_position;

int flags;

jv* stack;
int stackpos;
int stacklen;
Expand All @@ -40,12 +42,15 @@ struct jv_parser {
enum {
JV_PARSER_NORMAL,
JV_PARSER_STRING,
JV_PARSER_STRING_ESCAPE
JV_PARSER_STRING_ESCAPE,
JV_PARSER_WAITING_FOR_RS // parse error, waiting for RS
} st;
unsigned int last_ch_was_ws:1;
};


static void parser_init(struct jv_parser* p) {
p->flags = 0;
p->stack = 0;
p->stacklen = p->stackpos = 0;
p->next = jv_invalid();
Expand All @@ -60,10 +65,18 @@ static void parser_init(struct jv_parser* p) {
jvp_dtoa_context_init(&p->dtoa);
}

static void parser_free(struct jv_parser* p) {
static void parser_reset(struct jv_parser* p) {
jv_free(p->next);
p->next = jv_invalid();
for (int i=0; i<p->stackpos; i++)
jv_free(p->stack[i]);
p->stackpos = 0;
p->tokenpos = 0;
p->st = JV_PARSER_NORMAL;
}

static void parser_free(struct jv_parser* p) {
parser_reset(p);
jv_mem_free(p->stack);
jv_mem_free(p->tokenbuf);
jvp_dtoa_context_free(&p->dtoa);
Expand Down Expand Up @@ -330,9 +343,26 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
p->line++;
p->column = 0;
}
if (ch == '\036' /* ASCII RS; see draft-ietf-json-sequence-07 */) {
TRY(check_literal(p));
if (p->st == JV_PARSER_NORMAL && check_done(p, out)) {
if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(*out) == JV_KIND_NUMBER) {
jv_free(*out);
*out = jv_invalid();
return "Potentially truncated top-level numeric value";
}
return OK;
}
parser_reset(p);
*out = jv_invalid();
return "Truncated value";
}
presult answer = 0;
p->last_ch_was_ws = 0;
if (p->st == JV_PARSER_NORMAL) {
chclass cls = classify(ch);
if (cls == WHITESPACE)
p->last_ch_was_ws = 1;
if (cls != LITERAL) {
TRY(check_literal(p));
if (check_done(p, out)) answer = OK;
Expand Down Expand Up @@ -373,6 +403,7 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
struct jv_parser* jv_parser_new(int flags) {
struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser));
parser_init(p);
p->flags = flags;
return p;
}

Expand Down Expand Up @@ -412,14 +443,22 @@ jv jv_parser_next(struct jv_parser* p) {
assert(p->curr_buf && "a buffer must be provided");
if (p->bom_strip_position == 0xff) return jv_invalid_with_msg(jv_string("Malformed BOM"));
jv value;
char ch;
presult msg = 0;
while (!msg && p->curr_buf_pos < p->curr_buf_length) {
char ch = p->curr_buf[p->curr_buf_pos++];
ch = p->curr_buf[p->curr_buf_pos++];
if (ch != '\036' && p->st == JV_PARSER_WAITING_FOR_RS)
continue; // need to resync, wait for RS
msg = scan(p, ch, &value);
}
if (msg == OK) {
return value;
} else if (msg) {
parser_reset(p);
if (ch != '\036' && (p->flags & JV_PARSE_SEQ)) {
p->st = JV_PARSER_WAITING_FOR_RS;
return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d (need RS to resync)", msg, p->line, p->column));
}
return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d", msg, p->line, p->column));
} else if (p->curr_buf_is_partial) {
assert(p->curr_buf_pos == p->curr_buf_length);
Expand All @@ -428,16 +467,31 @@ jv jv_parser_next(struct jv_parser* p) {
} else {
assert(p->curr_buf_pos == p->curr_buf_length);
// at EOF
if (p->st != JV_PARSER_NORMAL)
return jv_invalid_with_msg(jv_string("Unfinished string"));
if ((msg = check_literal(p)))
return jv_invalid_with_msg(jv_string(msg));
if (p->stackpos != 0)
return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
if (p->st != JV_PARSER_WAITING_FOR_RS) {
if (p->st != JV_PARSER_NORMAL) {
parser_reset(p);
p->st = JV_PARSER_WAITING_FOR_RS;
return jv_invalid_with_msg(jv_string("Unfinished string"));
}
if ((msg = check_literal(p))) {
parser_reset(p);
p->st = JV_PARSER_WAITING_FOR_RS;
return jv_invalid_with_msg(jv_string(msg));
}
if (p->stackpos != 0) {
parser_reset(p);
p->st = JV_PARSER_WAITING_FOR_RS;
return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
}
}
// p->next is either invalid (nothing here but no syntax error)
// or valid (this is the value). either way it's the thing to return
value = p->next;
p->next = jv_invalid();
if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(value) == JV_KIND_NUMBER) {
jv_free(value);
return jv_invalid_with_msg(jv_string("Potentially truncated top-level numeric value"));
}
return value;
}
}
Expand Down
37 changes: 25 additions & 12 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,9 @@ enum {
UNBUFFERED_OUTPUT = 2048,
EXIT_STATUS = 4096,
IN_PLACE = 8192,
SEQ = 16384,
/* debugging only */
DUMP_DISASM = 16384,
DUMP_DISASM = 32768,
};
static int options = 0;

Expand Down Expand Up @@ -122,6 +123,8 @@ static int process(jq_state *jq, jv value, int flags) {
ret = 11;
else
ret = 0;
if (options & SEQ)
fwrite("\036", 1, 1, stdout);
jv_dump(result, dumpopts);
}
if (!(options & RAW_NO_LF))
Expand Down Expand Up @@ -284,6 +287,10 @@ int main(int argc, char* argv[]) {
options |= IN_PLACE;
if (!short_opts) continue;
}
if (isoption(argv[i], 0, "seq", &short_opts)) {
options |= SEQ;
if (!short_opts) continue;
}
if (isoption(argv[i], 'e', "exit-status", &short_opts)) {
options |= EXIT_STATUS;
if (!short_opts) continue;
Expand Down Expand Up @@ -444,7 +451,7 @@ int main(int argc, char* argv[]) {
slurped = jv_array();
}
}
struct jv_parser* parser = jv_parser_new(0);
struct jv_parser* parser = jv_parser_new((options & SEQ) ? JV_PARSE_SEQ : 0);
char buf[4096];
int is_last = 0;
while (read_more(buf, sizeof(buf), &is_last)) {
Expand All @@ -461,22 +468,28 @@ int main(int argc, char* argv[]) {
} else {
jv_parser_set_buf(parser, buf, strlen(buf), !is_last);
jv value;
while (jv_is_valid((value = jv_parser_next(parser)))) {
while (jv_is_valid(value = jv_parser_next(parser)) || jv_invalid_has_msg(jv_copy(value))) {
if (!jv_is_valid(value)) {
jv msg = jv_invalid_get_msg(value);
if (!(options & SEQ)) {
// We used to treat parse errors as fatal...
ret = 4;
fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
jv_free(msg);
break;
}
fprintf(stderr, "ignoring parse error: %s\n", jv_string_value(msg));
jv_free(msg);
// ...but with --seq we attempt to recover.
continue;
}
if (options & SLURP) {
slurped = jv_array_append(slurped, value);
} else {
ret = process(jq, value, jq_flags);
value = jv_invalid();
}
}
if (jv_invalid_has_msg(jv_copy(value))) {
jv msg = jv_invalid_get_msg(value);
fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
jv_free(msg);
ret = 4;
break;
} else {
jv_free(value);
}
}
}
jv_parser_free(parser);
Expand Down
40 changes: 40 additions & 0 deletions tests/run
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,46 @@ case "$v" in
*) true;;
esac

## Test JSON sequence support

cat > $d/expected <<EOF
ignoring parse error: Potentially truncated top-level numeric value at line 1, column 2
ignoring parse error: Truncated value at line 2, column 5
ignoring parse error: Truncated value at line 2, column 25
ignoring parse error: Truncated value at line 2, column 41
EOF
printf '1\0362 3\n[0,1\036[4,5]true"ab"{"c":4\036{}{"d":5,"e":6"\036false\n'|$VALGRIND $Q ./jq -ces --seq '. == [2,3,[4,5],true,"ab",{},false]' > /dev/null 2> $d/out
cmp $d/out $d/expected

cat > $d/expected <<EOF
ignoring parse error: Potentially truncated top-level numeric value at line 1, column 2
ignoring parse error: Truncated value at line 2, column 5
ignoring parse error: Truncated value at line 2, column 25
ignoring parse error: Invalid literal at line 3, column 1
EOF
printf '1\0362 3\n[0,1\036[4,5]true"ab"{"c":4\036{}{"d":5,"e":6"false\n\036null'|$VALGRIND $Q ./jq -ces --seq '. == [2,3,[4,5],true,"ab",{},null]' > /dev/null 2> $d/out
cmp $d/out $d/expected

# Note that here jq sees no inputs at all but it still succeeds because
# --seq ignores parse errors
cat > $d/expected <<EOF
ignoring parse error: Unfinished string
EOF
printf '"foo'|./jq -ce --seq . > $d/out 2>&1
cmp $d/out $d/expected

# Numeric values truncated by EOF are ignored
cat > $d/expected <<EOF
ignoring parse error: Potentially truncated top-level numeric value
EOF
printf '1'|./jq -ce --seq . > $d/out 2>&1
cmp $d/out $d/expected

cat > $d/expected <<EOF
EOF
printf '1\n'|./jq -ces --seq '. == [1]' >/dev/null 2> $d/out
cmp $d/out $d/expected

## Test library/module system

mods=$PWD/tests/modules
Expand Down

0 comments on commit 89791a0

Please sign in to comment.