Skip to content

Commit

Permalink
JSON support, bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
ericbusboom committed Nov 14, 2017
1 parent 4c3987b commit f48d210
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 5 deletions.
5 changes: 4 additions & 1 deletion rowpipe/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,10 @@ def rewrite_tg(env, tg_name, code):
visitor = ReplaceTG(env, tg_name)
assert visitor.tg_name

tree = visitor.visit(ast.parse(code))
try:
tree = visitor.visit(ast.parse(code))
except SyntaxError as e:
raise SyntaxError(str(e)+"\nIn code: \n"+code)

if visitor.loc:
loc = ' #' + visitor.loc
Expand Down
109 changes: 109 additions & 0 deletions rowpipe/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright (c) 2016 Civic Knowledge. This file is licensed under the terms of the
# MIT License, included in this distribution as LICENSE.txt

"""
Create row processor for generating JSON
The format for each line is a sequence of path elements, with a terminal at the end. For instance:
a.b.c.t
Assume there is a path specification for each column in the table, and that the column name specification substituted
for a missing path specification.
A path element without a '[]' specifies a key to an object, and a path element with a '[]' specifies an array.
A path element with a '[.]' specifies that the value should be set on the same list item as the last path element.
* "a: v": Create Key a, set to v
* "a.b: v": Create Key a, set to new object. Create key b, set to v
* "a.b[]: v" Create key a, set to new object. Create key b, set to new list. Append v
* "a[].b: v" Create key a, set to new list. Create new object. Create key b, set to v
The "[-]" means to use the last element of the existing list.
Types of path elements:
a: terminal, object key
a.: nonterminal, object key and new object
a[]: terminal, add to new or existing list
a[].: nonterminal, new array with new object
a[-].:nonterminal, last object in list
"""

import json

def parse_path(path):

parts = []

for e in path.split('.'):
if e.endswith('[]'):
k,_ = e.split('[')
parts.append([k,'an', False]) # array, new
elif e.endswith('[-]'):
k, _ = e.split('[')
parts.append([k, 'al', False]) # array, last
else:
k = e
parts.append([k, 'o', False]) # object

if parts:
parts[-1][2] = True # The last item is the terminal

return parts

def add_to_struct(s, path, v):

o = s

path_parts = parse_path(path)

#print("----", path)

for i, (key, type, is_terminal) in enumerate(path_parts):

if type == 'an' and not is_terminal:

if key not in o:
o[key] = []

o[key].append({})

o = o[key][-1]

elif type == 'al' and not is_terminal:
if key not in o:
raise Exception("Expected list '{}' to exist in '{}' ".format(key, path) )

o = o[key][-1]

elif type == 'o' and not is_terminal:
if key not in o:
o[key] = {}
o = o[key]
elif type == 'an' and is_terminal:
if key not in o:
o[key] = []
o[key].append(v)

elif type == 'al' and is_terminal:
print("E")

elif type == 'o' and is_terminal:
o[key] = v
else:
print("G ", type, is_terminal)

class VTEncoder(json.JSONEncoder):

def default(self, obj):
from rowpipe.valuetype import DateTimeVT, DateVT, TimeVT

if isinstance(obj, (DateTimeVT, DateVT, TimeVT) ):
return str(obj)

# Let the base class default method raise the TypeError
return json.JSONEncoder.default(self, obj)
14 changes: 12 additions & 2 deletions rowpipe/valuetype/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,12 @@ def __new__(cls, v):
return NoneValue

try:
d = parser.parse(v)
if isinstance(v, time):
d = v
elif not isinstance(v, str):
d = parser.parse(str(v))
else:
d = parser.parse(v)

return super(TimeValue, cls).__new__(cls, d.hour, d.minute, d.second)
except TypeError:
Expand All @@ -450,7 +455,12 @@ def __new__(cls, v):
return NoneValue

try:
d = parser.parse(v)
if isinstance(v, datetime):
d = v
elif not isinstance(v, str):
d = parser.parse(str(v))
else:
d = parser.parse(v)
return super(DateTimeValue, cls).__new__(cls, d.year, d.month, d.day, d.hour, d.minute, d.second)

except TypeError:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

setup(
name='rowpipe',
version='0.1.8',
version='0.2.0',
description='Generate row data from a variety of file formats',
long_description=readme,
packages=find_packages(),
Expand All @@ -46,7 +46,7 @@
'rowgenerators'],
author="Eric Busboom",
author_email='eric@civicknowledge.com',
url='https://github.com/CivicKnowledge/rowgenerator.git',
url='https://github.com/Metatab/rowgenerator.git',
license='MIT',
classifiers=classifiers
)
52 changes: 52 additions & 0 deletions test/test_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from __future__ import print_function

import unittest
from rowpipe.json import add_to_struct, VTEncoder

class TestJson(unittest.TestCase):


def test_basic(self):
import json

d = {}

add_to_struct(d, 'a', 1)
add_to_struct(d, 'b', 2)
add_to_struct(d, 'c.a', 1)
add_to_struct(d, 'c.b', 1)
add_to_struct(d, 'd[]', 1)
add_to_struct(d, 'd[]', 2)
add_to_struct(d, 'e[].a', 10)
add_to_struct(d, 'e[].b', 11)
add_to_struct(d, 'f[].a[]', 20)
add_to_struct(d, 'f[].a[]', 21)
add_to_struct(d, 'f[].b[]', 30)
add_to_struct(d, 'f[].b[]', 31)
add_to_struct(d, 'attr[].key', 'k1')
add_to_struct(d, 'attr[-].value', 'v1')
add_to_struct(d, 'attr[].key', 'k2')
add_to_struct(d, 'attr[-].value', 'v2')

print (json.dumps(d, indent=4))

def test_table(self):
import json
from metapack import open_package
from itertools import islice

u = '/Volumes/Storage/proj/virt/data-projects/client-boston-college/bc.edu-dataconv_poc/_packages/bc.edu-dataconv_poc-1/'
pkg = open_package(u)
r = pkg.resource('comments')

json_headers = [ (c['pos'], c.get('json')) for c in r.columns()]

for row in islice(r, None, 10):
d = {}
for pos, jh in json_headers:
add_to_struct(d, jh, row[pos])

print(json.dumps(d, indent=4, cls=VTEncoder))

if __name__ == '__main__':
unittest.main()

0 comments on commit f48d210

Please sign in to comment.