Skip to content

Commit

Permalink
[feat] port jsonpath from JSONLab, add loadurl for REST API
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Mar 31, 2024
1 parent fbf070c commit 2647fc9
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 5 deletions.
4 changes: 4 additions & 0 deletions jdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from .jfile import (
load,
save,
loadurl,
show,
loadt,
savet,
Expand All @@ -48,11 +49,13 @@
jext,
)
from .jdata import encode, decode, jdtype, jsonfilter
from .jpath import jsonpath

__version__ = "0.5.5"
__all__ = [
"load",
"save",
"loadurl",
"show",
"loadt",
"savet",
Expand All @@ -67,6 +70,7 @@
"jdtype",
"jsonfilter",
"jext",
"jsonpath",
]
__license__ = """Apache license 2.0, Copyright (c) 2019-2024 Qianqian Fang"""

Expand Down
39 changes: 34 additions & 5 deletions jdata/jfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
__all__ = [
"load",
"save",
"loadurl",
"show",
"loadt",
"savet",
Expand Down Expand Up @@ -54,7 +55,7 @@ def load(fname, opt={}, **kwargs):
"""
if re.match("^https*://", fname):
newdata = downloadlink(fname, opt, **kwargs)
return newdata
return newdata[0]

spl = os.path.splitext(fname)
ext = spl[1].lower()
Expand Down Expand Up @@ -102,6 +103,24 @@ def save(data, fname, opt={}, **kwargs):
)


def loadurl(url, opt={}, **kwargs):
"""@brief Loading a JData file (binary or text) from a URL without caching locally
@param[in] url: a REST API URL, curently only support http:// and https://
@param[in] opt: options, opt['nocache']=True by default, setting to False download and locally cache the data
"""
opt.setdefault("nocache", True)

if re.match("^https*://", url):
newdata = downloadlink(url, opt, **kwargs)
return newdata[0]
else:
raise Exception(
"JData",
"input to loadurl is not a valid URL",
)


##====================================================================================
## Loading and saving text-based JData (i.e. JSON) files
##====================================================================================
Expand Down Expand Up @@ -370,7 +389,6 @@ def jsoncache(url, opt={}, **kwargs):
if p is not None:
cachepath.insert(0, p)
elif dbname and docname:
print([domain, dbname, docname, cachepath])
cachepath = [os.path.join(x, domain, dbname, docname) for x in cachepath]
if filename is not None:
for i in range(len(cachepath)):
Expand Down Expand Up @@ -421,21 +439,32 @@ def jdlink(uripath, opt={}, **kwargs):
)
alloutput = [[] for _ in range(3)]
for i in range(len(uripath)):
newdata, fname, cachepath = downloadlink(uripath[i], opt)
newdata, fname, cachepath = downloadlink(uripath[i], opt, **kwargs)
alloutput[0].append(newdata)
alloutput[1].append(fname)
alloutput[2].append(cachepath)
if len(uripath) == 1:
alloutput = [x[0] for x in alloutput]
newdata, fname, cachepath = tuple(alloutput)
elif isinstance(uripath, str):
newdata, fname, cachepath = downloadlink(uripath, opt)
newdata, fname, cachepath = downloadlink(uripath, opt, **kwargs)
return newdata, fname


def downloadlink(uripath, opt={}):
def downloadlink(uripath, opt={}, **kwargs):
opt.setdefault("showlink", 1)

if "nocache" in opt and opt["nocache"]:
newdata = urllib.request.urlopen(uripath).read()
try:
newdata = loadts(newdata, opt, **kwargs)
except:
try:
newdata = loadbs(newdata, opt, **kwargs)
except:
pass
return newdata, uripath, None

newdata = []
cachepath, filename = jsoncache(uripath)
if isinstance(cachepath, list) and cachepath:
Expand Down
165 changes: 165 additions & 0 deletions jdata/jpath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""@package docstring
JSONPath implementation ported from the jsonpath MATLAB function in JSONLab
Copyright (c) 2019-2024 Qianqian Fang <q.fang at neu.edu>
"""

__all__ = [
"jsonpath",
]

##====================================================================================
## dependent libraries
##====================================================================================


import re
import json
import copy


def jsonpath(root, jpath, opt={}):

obj = root
jpath = re.sub(r"([^.\]])(\[[-0-9:\*]+\])", r"\1.\2", jpath)
jpath = re.sub(r"\[[\'\"]*([^]\'\"]+)[\'\"]*\]", r".[\1]", jpath)
jpath = re.sub(r"\\.", "_0x2E_", jpath)
while re.search(r"(\[[\'\"]*[^]\'\"]+)\.(?=[^]\'\"]+[\'\"]*\])", jpath):
jpath = re.sub(
r"(\[[\'\"]*[^]\'\"]+)\.(?=[^]\'\"]+[\'\"]*\])", r"\1_0x2E_", jpath
)

paths = re.findall(r"(\.{0,2}[^.]+)", jpath)
paths = [re.sub("_0x2E_", ".", x) for x in paths]
if paths and paths[0] == "$":
paths.pop(0)

for i, path in enumerate(paths):
obj, isfound = getonelevel(obj, paths, i, opt)
if not isfound:
return None
return obj


def getonelevel(input_data, paths, pathid, opt):

opt.setdefault("inplace", False)

pathname = paths[pathid]
if isinstance(pathname, list):
pathname = pathname[0]
deepscan = bool(re.search(r"^\.\.", pathname))
origpath = pathname
pathname = re.sub(r"^\.+", "", pathname)
obj = None
isfound = False

if pathname == "$":
obj = input_data
elif re.match(r"\$\d+", pathname):
obj = input_data[int(pathname[2:]) + 1]
elif re.match(r"^\[[\-0-9\*:]+\]$", pathname) or isinstance(
input_data, (list, tuple, frozenset)
):
arraystr = pathname[1:-1]
arrayrange = {"start": None, "end": None}

if ":" in arraystr:
match = re.search(r"(?P<start>-*\d*):(?P<end>-*\d*)", arraystr)
if match:
arrayrange["start"] = (
int(match.group("start")) if match.group("start") else None
)
arrayrange["end"] = (
int(match.group("end")) if match.group("end") else None
)

if arrayrange["start"] is not None:
if arrayrange["start"] < 0:
arrayrange["start"] = len(input_data) + arrayrange["start"]
else:
arrayrange["start"] += 1
else:
arrayrange["start"] = 1

if arrayrange["end"] is not None:
if arrayrange["end"] < 0:
arrayrange["end"] = len(input_data) + arrayrange["end"]
else:
arrayrange["end"] += 1
else:
arrayrange["end"] = len(input_data)
elif re.match(r"^[-0-9:]+$", arraystr):
firstidx = int(arraystr)
if firstidx < 0:
firstidx = len(input_data) + firstidx + 1
else:
firstidx += 1
arrayrange["start"] = arrayrange["end"] = firstidx
elif re.match(r"^\*$", arraystr):
pass

if (
"arrayrange" in locals()
and arrayrange["start"] is not None
and arrayrange["end"] is not None
):
obj = input_data[arrayrange["start"] - 1 : arrayrange["end"]]
else:
arrayrange = {"start": 1, "end": len(input_data)}

if not obj and isinstance(input_data, list):
input_data = input_data[arrayrange["start"] - 1 : arrayrange["end"]]
searchkey = ".." + pathname if deepscan else origpath
newobj = []
for idx, item in enumerate(input_data):
val, isfound = getonelevel(
item, paths[:pathid] + [searchkey], pathid, opt
)
if isfound:
newobj.extend(val)
if newobj:
obj = newobj
if isinstance(obj, list) and len(obj) == 1:
obj = obj[0]

elif isinstance(input_data, dict):
pathname = re.sub(r"^\[(.*)\]$", r"\1", pathname)
stpath = pathname

if stpath in input_data:
obj = [input_data[stpath]]

deepscan = False
if obj is None or deepscan:
items = input_data.keys()

for idx in items:
val, isfound = getonelevel(
input_data[idx], paths[:pathid] + [[".." + pathname]], pathid, opt
)
if isfound:
obj = obj or []
if isinstance(val, list):
obj.extend(val)
else:
obj.append(val)

if obj and len(obj) == 1:
obj = obj[0]

if isinstance(obj, list) and len(obj) == 1:
obj = obj[0]

elif not deepscan:
raise ValueError(
f'json path segment "{pathname}" can not be found in the input_data object'
)

if obj is None:
isfound = False
obj = []
else:
isfound = True

return (copy.deepcopy(obj), isfound) if opt["inplace"] else (obj, isfound)

0 comments on commit 2647fc9

Please sign in to comment.