Skip to content

Commit

Permalink
Merge pull request #1051 from guzman-raphael/json
Browse files Browse the repository at this point in the history
Add `json` data type
  • Loading branch information
dimitri-yatsenko authored Feb 10, 2023
2 parents f28a3b9 + 477d270 commit baf445a
Show file tree
Hide file tree
Showing 14 changed files with 1,411 additions and 63 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## Release notes

### 0.14.0 -- TBA
### 0.14.0 -- Feb 10, 2023
- Added - `json` data type ([#245](https://github.com/datajoint/datajoint-python/issues/245)) PR [#1051](https://github.com/datajoint/datajoint-python/pull/1051)
- Fixed - Activating a schema requires all tables to exist even if `create_tables=False` PR [#1058](https://github.com/datajoint/datajoint-python/pull/1058)
- Changed - Populate call with `reserve_jobs=True` to exclude `error` and `ignore` keys - PR [#1062](https://github.com/datajoint/datajoint-python/pull/1062)
- Added - Support for inserting data with CSV files - PR [#1067](https://github.com/datajoint/datajoint-python/pull/1067)
Expand Down
103 changes: 71 additions & 32 deletions datajoint/condition.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,29 @@
import decimal
import numpy
import pandas
import json
from .errors import DataJointError

JSON_PATTERN = re.compile(
r"^(?P<attr>\w+)(\.(?P<path>[\w.*\[\]]+))?(:(?P<type>[\w(,\s)]+))?$"
)


def translate_attribute(key):
match = JSON_PATTERN.match(key)
if match is None:
return match, key
match = match.groupdict()
if match["path"] is None:
return match, match["attr"]
else:
return match, "json_value(`{}`, _utf8mb4'$.{}'{})".format(
*[
((f" returning {v}" if k == "type" else v) if v else "")
for k, v in match.items()
]
)


class PromiscuousOperand:
"""
Expand Down Expand Up @@ -94,35 +115,56 @@ def make_condition(query_expression, condition, columns):
from .expression import QueryExpression, Aggregation, U

def prep_value(k, v):
"""prepare value v for inclusion as a string in an SQL condition"""
if query_expression.heading[k].uuid:
"""prepare SQL condition"""
key_match, k = translate_attribute(k)
if key_match["path"] is None:
k = f"`{k}`"
if (
query_expression.heading[key_match["attr"]].json
and key_match["path"] is not None
and isinstance(v, dict)
):
return f"{k}='{json.dumps(v)}'"
if v is None:
return f"{k} IS NULL"
if query_expression.heading[key_match["attr"]].uuid:
if not isinstance(v, uuid.UUID):
try:
v = uuid.UUID(v)
except (AttributeError, ValueError):
raise DataJointError(
"Badly formed UUID {v} in restriction by `{k}`".format(k=k, v=v)
)
return "X'%s'" % v.bytes.hex()
return f"{k}=X'{v.bytes.hex()}'"
if isinstance(
v, (datetime.date, datetime.datetime, datetime.time, decimal.Decimal)
v,
(
datetime.date,
datetime.datetime,
datetime.time,
decimal.Decimal,
list,
),
):
return '"%s"' % v
return f'{k}="{v}"'
if isinstance(v, str):
return '"%s"' % v.replace("%", "%%").replace("\\", "\\\\")
return "%r" % v
v = v.replace("%", "%%").replace("\\", "\\\\")
return f'{k}="{v}"'
return f"{k}={v}"

def combine_conditions(negate, conditions):
return f"{'NOT ' if negate else ''} ({')AND('.join(conditions)})"

negate = False
while isinstance(condition, Not):
negate = not negate
condition = condition.restriction
template = "NOT (%s)" if negate else "%s"

# restrict by string
if isinstance(condition, str):
columns.update(extract_column_names(condition))
return template % condition.strip().replace(
"%", "%%"
return combine_conditions(
negate, conditions=[condition.strip().replace("%", "%%")]
) # escape %, see issue #376

# restrict by AndList
Expand All @@ -139,7 +181,7 @@ def prep_value(k, v):
return negate # if any item is False, the whole thing is False
if not items:
return not negate # and empty AndList is True
return template % ("(" + ") AND (".join(items) + ")")
return combine_conditions(negate, conditions=items)

# restriction by dj.U evaluates to True
if isinstance(condition, U):
Expand All @@ -151,23 +193,19 @@ def prep_value(k, v):

# restrict by a mapping/dict -- convert to an AndList of string equality conditions
if isinstance(condition, collections.abc.Mapping):
common_attributes = set(condition).intersection(query_expression.heading.names)
common_attributes = set(c.split(".", 1)[0] for c in condition).intersection(
query_expression.heading.names
)
if not common_attributes:
return not negate # no matching attributes -> evaluates to True
columns.update(common_attributes)
return template % (
"("
+ ") AND (".join(
"`%s`%s"
% (
k,
" IS NULL"
if condition[k] is None
else f"={prep_value(k, condition[k])}",
)
for k in common_attributes
)
+ ")"
return combine_conditions(
negate,
conditions=[
prep_value(k, v)
for k, v in condition.items()
if k.split(".", 1)[0] in common_attributes # handle json indexing
],
)

# restrict by a numpy record -- convert to an AndList of string equality conditions
Expand All @@ -178,12 +216,9 @@ def prep_value(k, v):
if not common_attributes:
return not negate # no matching attributes -> evaluate to True
columns.update(common_attributes)
return template % (
"("
+ ") AND (".join(
"`%s`=%s" % (k, prep_value(k, condition[k])) for k in common_attributes
)
+ ")"
return combine_conditions(
negate,
conditions=[prep_value(k, condition[k]) for k in common_attributes],
)

# restrict by a QueryExpression subclass -- trigger instantiation and move on
Expand Down Expand Up @@ -231,7 +266,11 @@ def prep_value(k, v):
] # ignore False conditions
if any(item is True for item in or_list): # if any item is True, entirely True
return not negate
return template % ("(%s)" % " OR ".join(or_list)) if or_list else negate
return (
f"{'NOT ' if negate else ''} ({' OR '.join(or_list)})"
if or_list
else negate
)


def extract_column_names(sql_expression):
Expand Down
38 changes: 18 additions & 20 deletions datajoint/declare.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
from .errors import DataJointError, _support_filepath_types, FILEPATH_FEATURE_SWITCH
from .attribute_adapter import get_adapter
from .condition import translate_attribute

UUID_DATA_TYPE = "binary(16)"
MAX_TABLE_NAME_LENGTH = 64
Expand All @@ -23,6 +24,7 @@
DECIMAL=r"(decimal|numeric)(\s*\(.+\))?(\s+unsigned)?$",
FLOAT=r"(double|float|real)(\s*\(.+\))?(\s+unsigned)?$",
STRING=r"(var)?char\s*\(.+\)$",
JSON=r"json$",
ENUM=r"enum\s*\(.+\)$",
BOOL=r"bool(ean)?$", # aliased to tinyint(1)
TEMPORAL=r"(date|datetime|time|timestamp|year)(\s*\(.+\))?$",
Expand Down Expand Up @@ -129,25 +131,9 @@ def build_attribute_parser():
return attribute_name + pp.Optional(default) + colon + data_type + comment


def build_index_parser():
left = pp.Literal("(").suppress()
right = pp.Literal(")").suppress()
unique = pp.Optional(pp.CaselessKeyword("unique")).setResultsName("unique")
index = pp.CaselessKeyword("index").suppress()
attribute_name = pp.Word(pp.srange("[a-z]"), pp.srange("[a-z0-9_]"))
return (
unique
+ index
+ left
+ pp.delimitedList(attribute_name).setResultsName("attr_list")
+ right
)


foreign_key_parser_old = build_foreign_key_parser_old()
foreign_key_parser = build_foreign_key_parser()
attribute_parser = build_attribute_parser()
index_parser = build_index_parser()


def is_foreign_key(line):
Expand Down Expand Up @@ -275,7 +261,7 @@ def prepare_declare(definition, context):
foreign_key_sql,
index_sql,
)
elif re.match(r"^(unique\s+)?index[^:]*$", line, re.I): # index
elif re.match(r"^(unique\s+)?index\s*.*$", line, re.I): # index
compile_index(line, index_sql)
else:
name, sql, store = compile_attribute(line, in_key, foreign_key_sql, context)
Expand Down Expand Up @@ -449,10 +435,22 @@ def alter(definition, old_definition, context):


def compile_index(line, index_sql):
match = index_parser.parseString(line)
def format_attribute(attr):
match, attr = translate_attribute(attr)
if match is None:
return attr
if match["path"] is None:
return f"`{attr}`"
return f"({attr})"

match = re.match(
r"(?P<unique>unique\s+)?index\s*\(\s*(?P<args>.*)\)", line, re.I
).groupdict()
attr_list = re.findall(r"(?:[^,(]|\([^)]*\))+", match["args"])
index_sql.append(
"{unique} index ({attrs})".format(
unique=match.unique, attrs=",".join("`%s`" % a for a in match.attr_list)
"{unique}index ({attrs})".format(
unique="unique " if match["unique"] else "",
attrs=",".join(format_attribute(a.strip()) for a in attr_list),
)
)

Expand Down
4 changes: 4 additions & 0 deletions datajoint/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
assert_join_compatibility,
extract_column_names,
PromiscuousOperand,
translate_attribute,
)
from .declare import CONSTANT_LITERALS

Expand Down Expand Up @@ -342,6 +343,9 @@ def proj(self, *attributes, **named_attributes):
from other attributes available before the projection.
Each attribute name can only be used once.
"""
named_attributes = {
k: translate_attribute(v)[1] for k, v in named_attributes.items()
}
# new attributes in parentheses are included again with the new name without removing original
duplication_pattern = re.compile(
rf'^\s*\(\s*(?!{"|".join(CONSTANT_LITERALS)})(?P<name>[a-zA-Z_]\w*)\s*\)\s*$'
Expand Down
4 changes: 3 additions & 1 deletion datajoint/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas
import itertools
import re
import json
import numpy as np
import uuid
import numbers
Expand Down Expand Up @@ -47,6 +48,8 @@ def _get(connection, attr, data, squeeze, download_path):
"""
if data is None:
return
if attr.json:
return json.loads(data)

extern = (
connection.schemas[attr.database].external[attr.store]
Expand All @@ -59,7 +62,6 @@ def _get(connection, attr, data, squeeze, download_path):

if attr.is_filepath:
return adapt(extern.download_filepath(uuid.UUID(bytes=data))[0])

if attr.is_attachment:
# Steps:
# 1. get the attachment filename
Expand Down
16 changes: 12 additions & 4 deletions datajoint/heading.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
numeric=None,
string=None,
uuid=False,
json=None,
is_blob=False,
is_attachment=False,
is_filepath=False,
Expand Down Expand Up @@ -142,7 +143,7 @@ def non_blobs(self):
return [
k
for k, v in self.attributes.items()
if not v.is_blob and not v.is_attachment and not v.is_filepath
if not (v.is_blob or v.is_attachment or v.is_filepath or v.json)
]

@property
Expand Down Expand Up @@ -290,6 +291,7 @@ def _init_from_database(self):
),
is_blob=bool(TYPE_PATTERN["INTERNAL_BLOB"].match(attr["type"])),
uuid=False,
json=bool(TYPE_PATTERN["JSON"].match(attr["type"])),
is_attachment=False,
is_filepath=False,
adapter=None,
Expand Down Expand Up @@ -375,10 +377,15 @@ def _init_from_database(self):
)

if attr["in_key"] and any(
(attr["is_blob"], attr["is_attachment"], attr["is_filepath"])
(
attr["is_blob"],
attr["is_attachment"],
attr["is_filepath"],
attr["json"],
)
):
raise DataJointError(
"Blob, attachment, or filepath attributes are not allowed in the primary key"
"Json, Blob, attachment, or filepath attributes are not allowed in the primary key"
)

if (
Expand Down Expand Up @@ -419,7 +426,8 @@ def _init_from_database(self):
):
if item["Key_name"] != "PRIMARY":
keys[item["Key_name"]][item["Seq_in_index"]] = dict(
column=item["Column_name"],
column=item["Column_name"]
or f"({item['Expression']})".replace(r"\'", "'"),
unique=(item["Non_unique"] == 0),
nullable=item["Null"].lower() == "yes",
)
Expand Down
3 changes: 3 additions & 0 deletions datajoint/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import uuid
import csv
import re
import json
from pathlib import Path
from .settings import config
from .declare import declare, alter
Expand Down Expand Up @@ -831,6 +832,8 @@ def __make_placeholder(self, name, value, ignore_extra_fields=False):
value = self.external[attr.store].upload_filepath(value).bytes
elif attr.numeric:
value = str(int(value) if isinstance(value, bool) else value)
elif attr.json:
value = json.dumps(value)
return name, placeholder, value

def __make_row_to_insert(self, row, field_list, ignore_extra_fields):
Expand Down
1 change: 1 addition & 0 deletions docs/.docker/pip_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ mdx-truly-sane-lists
mkdocs-gen-files
mkdocs-literate-nav
mkdocs-exclude-search
mkdocs-jupyter
5 changes: 4 additions & 1 deletion docs/mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ nav:
- Reproducibility:
- Table Tiers: reproduce/table-tiers.md
- Make Method: reproduce/make-method.md
- Tutorials: tutorials.md
- Tutorials:
- tutorials/json.ipynb
- Develop: develop.md
- Changelog: about/changelog.md
- API: api/ # defer to gen-files + literate-nav
Expand Down Expand Up @@ -72,6 +73,8 @@ plugins:
exclude:
- "*/navigation.md"
- "*/archive/*md"
- mkdocs-jupyter:
include: ["*.ipynb"]
markdown_extensions:
- attr_list
- toc:
Expand Down
Empty file removed docs/src/concepts.md
Empty file.
3 changes: 0 additions & 3 deletions docs/src/tutorials.md

This file was deleted.

Loading

0 comments on commit baf445a

Please sign in to comment.