-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4c3987b
commit f48d210
Showing
5 changed files
with
179 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
# Copyright (c) 2016 Civic Knowledge. This file is licensed under the terms of the | ||
# MIT License, included in this distribution as LICENSE.txt | ||
|
||
""" | ||
Create row processor for generating JSON | ||
The format for each line is a sequence of path elements, with a terminal at the end. For instance: | ||
a.b.c.t | ||
Assume there is a path specification for each column in the table, and that the column name specification substituted | ||
for a missing path specification. | ||
A path element without a '[]' specifies a key to an object, and a path element with a '[]' specifies an array. | ||
A path element with a '[.]' specifies that the value should be set on the same list item as the last path element. | ||
* "a: v": Create Key a, set to v | ||
* "a.b: v": Create Key a, set to new object. Create key b, set to v | ||
* "a.b[]: v" Create key a, set to new object. Create key b, set to new list. Append v | ||
* "a[].b: v" Create key a, set to new list. Create new object. Create key b, set to v | ||
The "[-]" means to use the last element of the existing list. | ||
Types of path elements: | ||
a: terminal, object key | ||
a.: nonterminal, object key and new object | ||
a[]: terminal, add to new or existing list | ||
a[].: nonterminal, new array with new object | ||
a[-].:nonterminal, last object in list | ||
""" | ||
|
||
import json | ||
|
||
def parse_path(path): | ||
|
||
parts = [] | ||
|
||
for e in path.split('.'): | ||
if e.endswith('[]'): | ||
k,_ = e.split('[') | ||
parts.append([k,'an', False]) # array, new | ||
elif e.endswith('[-]'): | ||
k, _ = e.split('[') | ||
parts.append([k, 'al', False]) # array, last | ||
else: | ||
k = e | ||
parts.append([k, 'o', False]) # object | ||
|
||
if parts: | ||
parts[-1][2] = True # The last item is the terminal | ||
|
||
return parts | ||
|
||
def add_to_struct(s, path, v): | ||
|
||
o = s | ||
|
||
path_parts = parse_path(path) | ||
|
||
#print("----", path) | ||
|
||
for i, (key, type, is_terminal) in enumerate(path_parts): | ||
|
||
if type == 'an' and not is_terminal: | ||
|
||
if key not in o: | ||
o[key] = [] | ||
|
||
o[key].append({}) | ||
|
||
o = o[key][-1] | ||
|
||
elif type == 'al' and not is_terminal: | ||
if key not in o: | ||
raise Exception("Expected list '{}' to exist in '{}' ".format(key, path) ) | ||
|
||
o = o[key][-1] | ||
|
||
elif type == 'o' and not is_terminal: | ||
if key not in o: | ||
o[key] = {} | ||
o = o[key] | ||
elif type == 'an' and is_terminal: | ||
if key not in o: | ||
o[key] = [] | ||
o[key].append(v) | ||
|
||
elif type == 'al' and is_terminal: | ||
print("E") | ||
|
||
elif type == 'o' and is_terminal: | ||
o[key] = v | ||
else: | ||
print("G ", type, is_terminal) | ||
|
||
class VTEncoder(json.JSONEncoder): | ||
|
||
def default(self, obj): | ||
from rowpipe.valuetype import DateTimeVT, DateVT, TimeVT | ||
|
||
if isinstance(obj, (DateTimeVT, DateVT, TimeVT) ): | ||
return str(obj) | ||
|
||
# Let the base class default method raise the TypeError | ||
return json.JSONEncoder.default(self, obj) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from __future__ import print_function | ||
|
||
import unittest | ||
from rowpipe.json import add_to_struct, VTEncoder | ||
|
||
class TestJson(unittest.TestCase): | ||
|
||
|
||
def test_basic(self): | ||
import json | ||
|
||
d = {} | ||
|
||
add_to_struct(d, 'a', 1) | ||
add_to_struct(d, 'b', 2) | ||
add_to_struct(d, 'c.a', 1) | ||
add_to_struct(d, 'c.b', 1) | ||
add_to_struct(d, 'd[]', 1) | ||
add_to_struct(d, 'd[]', 2) | ||
add_to_struct(d, 'e[].a', 10) | ||
add_to_struct(d, 'e[].b', 11) | ||
add_to_struct(d, 'f[].a[]', 20) | ||
add_to_struct(d, 'f[].a[]', 21) | ||
add_to_struct(d, 'f[].b[]', 30) | ||
add_to_struct(d, 'f[].b[]', 31) | ||
add_to_struct(d, 'attr[].key', 'k1') | ||
add_to_struct(d, 'attr[-].value', 'v1') | ||
add_to_struct(d, 'attr[].key', 'k2') | ||
add_to_struct(d, 'attr[-].value', 'v2') | ||
|
||
print (json.dumps(d, indent=4)) | ||
|
||
def test_table(self): | ||
import json | ||
from metapack import open_package | ||
from itertools import islice | ||
|
||
u = '/Volumes/Storage/proj/virt/data-projects/client-boston-college/bc.edu-dataconv_poc/_packages/bc.edu-dataconv_poc-1/' | ||
pkg = open_package(u) | ||
r = pkg.resource('comments') | ||
|
||
json_headers = [ (c['pos'], c.get('json')) for c in r.columns()] | ||
|
||
for row in islice(r, None, 10): | ||
d = {} | ||
for pos, jh in json_headers: | ||
add_to_struct(d, jh, row[pos]) | ||
|
||
print(json.dumps(d, indent=4, cls=VTEncoder)) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |