Skip to content

Commit

Permalink
204 added Text to sql, bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
adp-atea committed Nov 10, 2023
1 parent db1bd6a commit 99b9a3c
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 60 deletions.
2 changes: 1 addition & 1 deletion spinta/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ class Property(Node):
'lang': {'type': 'object'},
'comments': {},
'given_name': {'type': 'string'},
'explicitly_given': {'type': 'boolean', 'default': True},
'explicitly_given': {'type': 'boolean'},
'prepare_given': {'required': False},
}

Expand Down
26 changes: 19 additions & 7 deletions spinta/datasets/backends/sql/commands/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,13 +952,13 @@ def sort(env: SqlQueryBuilder, expr: Expr):
args, kwargs = expr.resolve(env)
env.sort = []
for key in args:
prop = env.model.properties[key.name]
column = env.backend.get_column(env.table, prop)
if isinstance(key, Negative):
column = column.desc()
else:
column = column.asc()
env.sort.append(column)
result = env.call('sort', key)
env.sort.append(result)


@ufunc.resolver(SqlQueryBuilder, DataType)
def sort(env, dtype):
return env.call('asc', dtype)


@ufunc.resolver(SqlQueryBuilder, Bind)
Expand Down Expand Up @@ -992,6 +992,18 @@ def _get_from_flatprops(model: Model, prop: str):
raise exceptions.FieldNotInResource(model, property=prop)


@ufunc.resolver(SqlQueryBuilder, DataType)
def negative(env: SqlQueryBuilder, dtype: DataType):
return Negative(dtype.prop.place)


@ufunc.resolver(SqlQueryBuilder, String)
def negative(env: SqlQueryBuilder, dtype: String):
if dtype.prop.parent and isinstance(dtype.prop.parent.dtype, Text):
return Negative(dtype.prop.place.replace('.', '@'))
return Negative(dtype.prop.place)


@ufunc.resolver(SqlQueryBuilder, int)
def limit(env: SqlQueryBuilder, n: int):
env.limit = n
Expand Down
46 changes: 30 additions & 16 deletions spinta/manifests/tabular/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ def _string_datatype_handler(reader: PropertyReader, row: dict):
if lang and lang in existing_data['langs']:
raise Exception("LANG ALREADY DEFINED")
elif not lang:
existing_data['langs'][''] = new_data
existing_data['langs']['C'] = new_data
else:
existing_data['langs'][lang] = new_data
else:
Expand Down Expand Up @@ -808,7 +808,7 @@ def _text_datatype_handler(reader: PropertyReader, row: dict):
f"Now it is defined in {context.name!r} {context.type} context."
)
result = _check_if_property_already_set(reader, row, given_name)
if not (result and result['explicitly_given'] is False and result['type'] == 'text'):
if not (result and result['explicitly_given'] is False and result['type'] == 'text' or not result):
raise Exception("ALREADY TEXT SET")
dtype = _get_type_repr(row['type'])
dtype = _parse_dtype_string(dtype)
Expand All @@ -834,20 +834,27 @@ def _text_datatype_handler(reader: PropertyReader, row: dict):
new_data['external'] = {
'name': row['source'],
}
temp_data = new_data.copy()
temp_data['explicitly_given'] = False
temp_data.pop('langs')
temp_data = _empty_property(_initial_normal_property_schema(given_name, dtype, {
'property': row['property'],
'access': row['access'],
}))
temp_data['type'] = 'string'
temp_data['external'] = new_data['external']
if result:
new_data['langs'] = result['langs']
if new_data['level'] and int(new_data['level']) <= 3:
new_data['langs']['C'] = temp_data
if new_data['external']:
new_data['external'] = {}
result.update(new_data)
if result['level'] and int(result['level']) <= 3:
result['langs']['C'] = temp_data
return result
else:
if result['level'] and int(new_data['level']) <= 3:
if new_data['level'] and int(new_data['level']) <= 3:
new_data['langs'] = {
'C': temp_data
}
if new_data['external']:
new_data['external'] = {}
return new_data


Expand Down Expand Up @@ -937,6 +944,11 @@ def _combine_parent_with_prop(prop_name: str, prop: dict, parent_prop: dict, ful
return return_name


def _empty_property(data: dict):
data['explicitly_given'] = False
return data


def _get_parent_data_array(reader: PropertyReader, given_row: dict, full_name: str, current_parent: dict):
name = full_name.split('.')[-1]
count = name.count('[]')
Expand All @@ -947,20 +959,20 @@ def _get_parent_data_array(reader: PropertyReader, given_row: dict, full_name: s
'access': given_row['access'],
})
if not current_parent:
current_parent.update(_array_datatype_handler(reader, empty_array_row))
current_parent.update(_empty_property(_array_datatype_handler(reader, empty_array_row)))
adjustment = 1 if current_parent['type'] in ALLOWED_ARRAY_TYPES else 0
for i in range(count - adjustment):
if current_parent['type'] in ALLOWED_ARRAY_TYPES:
if current_parent['items'] and current_parent['items']['type'] not in ALLOWED_ARRAY_TYPES:
raise NestedDataTypeMissmatch(initial=current_parent['type'], required='array')
elif not current_parent['items']:
current_parent['items'].update(_handle_datatype(reader, empty_array_row))
current_parent['items'].update(_empty_property(_array_datatype_handler(reader, empty_array_row)))
current_parent = current_parent['items']
elif current_parent['type'] in ALLOWED_PARTIAL_TYPES:
if root_name in current_parent['properties'] and current_parent['properties'][root_name]['type'] not in ALLOWED_ARRAY_TYPES:
raise NestedDataTypeMissmatch(initial=current_parent['type'], required='array')
elif root_name not in current_parent['properties']:
current_parent['properties'][root_name] = _handle_datatype(reader, empty_array_row)
current_parent['properties'][root_name] = _empty_property(_array_datatype_handler(reader, empty_array_row))
current_parent = current_parent['properties'][root_name]
return current_parent

Expand All @@ -973,19 +985,19 @@ def _get_parent_data_partial(reader: PropertyReader, given_row: dict, full_name:
})
name = _clean_up_prop_name(full_name.split('.')[-1])
if not current_parent:
current_parent.update(_handle_datatype(reader, empty_partial_row))
current_parent.update(_empty_property(_partial_datatype_handler(reader, empty_partial_row)))
else:
if current_parent['type'] in ALLOWED_ARRAY_TYPES:
if current_parent['items'] and current_parent['items']['type'] not in ALLOWED_PARTIAL_TYPES:
raise NestedDataTypeMissmatch(initial=current_parent['type'], required='partial')
elif not current_parent['items']:
current_parent['items'].update(_handle_datatype(reader, empty_partial_row))
current_parent['items'].update(_empty_property(_partial_datatype_handler(reader, empty_partial_row)))
current_parent = current_parent['items']
elif current_parent['type'] in ALLOWED_PARTIAL_TYPES:
if name in current_parent['properties'] and current_parent['properties'][name]['type'] not in ALLOWED_PARTIAL_TYPES:
raise NestedDataTypeMissmatch(initial=current_parent['type'], required='partial')
elif name not in current_parent['properties']:
current_parent['properties'][name] = _handle_datatype(reader, empty_partial_row)
current_parent['properties'][name] = _empty_property(_partial_datatype_handler(reader, empty_partial_row))
current_parent = current_parent['properties'][name]
return current_parent

Expand Down Expand Up @@ -2174,16 +2186,18 @@ def _property_to_tabular(
elif isinstance(prop.dtype, Object):
for obj_prop in prop.dtype.properties.values():
yield_rows.append(obj_prop)
elif isinstance(prop.dtype, Text):
for lang_prop in prop.dtype.langs.values():
yield_rows.append(lang_prop)

elif prop.enum is not None:
data['ref'] = prop.given.enum
elif prop.unit is not None:
data['ref'] = prop.given.unit
data, prepare_rows = _prepare_to_tabular(data, prop)
if prop.given.name:
if prop.given.explicit:
yield torow(DATASET, data)
yield from prepare_rows
#yield from _text_to_tabular(prop)
yield from _comments_to_tabular(prop.comments, access=access)
yield from _lang_to_tabular(prop.lang)
yield from _enums_to_tabular(
Expand Down
13 changes: 0 additions & 13 deletions spinta/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,17 @@
from typing import Tuple
from typing import Type
from typing import Union
from typing import TypeVar
from typing import Iterator
from typing import NamedTuple
from typing import overload
from collections import defaultdict
from spinta import commands
from spinta import exceptions
from spinta.components import Component
from spinta.components import Config
from spinta.components import Context
from spinta.components import EntryId
from spinta.components import Node
from spinta.components import Model
from spinta.manifests.components import Manifest
from spinta.utils.schema import NA
from spinta.utils.schema import resolve_schema
from spinta.types.datatype import Ref


def get_node(
Expand Down Expand Up @@ -233,10 +227,3 @@ def load_model_properties(
prop = commands.load(context, prop, params, model.manifest)
model.properties[name] = prop
model.flatprops[name] = prop


class _SplitProp(NamedTuple):
type: str # text, object, array
name: str # base part of the name
tail: str # tail part of the name
data: str # property data dict
4 changes: 1 addition & 3 deletions spinta/types/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,6 @@ def load(
prop, data = load_node(context, prop, data, mixed=True)
prop = cast(Property, prop)

if 'prepare_given' in data and data['prepare_given']:
prop.given.prepare = data['prepare_given']
parents = list(itertools.chain(
[prop.model, prop.model.ns],
prop.model.ns.parents(),
Expand Down Expand Up @@ -313,7 +311,7 @@ def load(
else:
prop.given.enum = unit
prop.given.name = prop.given_name if prop.given_name else prop.name
prop.given.explicit = prop.explicitly_given if prop.explicitly_given else True
prop.given.explicit = prop.explicitly_given if prop.explicitly_given is not None else True
return prop


Expand Down
39 changes: 19 additions & 20 deletions tests/datasets/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2459,12 +2459,12 @@ def test_cast_string(
def test_type_text_push(postgresql, rc, cli: SpintaCliRunner, responses, tmpdir, geodb, request):
create_tabular_manifest(tmpdir / 'manifest.csv', striptable('''
d | r | b | m | property| type | ref | source | access
datasets/gov/example | | | |
datasets/gov/example/text_push | | | |
| data | sql | | |
| | | | | |
| | | Country | | code | salis |
| | | | code | string | | kodas | open
| | | | name@lt | text | | pavadinimas | open
| | | | name@lt | string | | pavadinimas | open
| | | | | |
| | | City | | name | miestas |
| | | | name | string | | pavadinimas | open
Expand All @@ -2482,19 +2482,18 @@ def test_type_text_push(postgresql, rc, cli: SpintaCliRunner, responses, tmpdir,
assert remote.url == 'https://example.com/'
result = cli.invoke(localrc, [
'push',
'-d', 'datasets/gov/example',
'-d', 'datasets/gov/example/text_push',
'-o', remote.url,
'--credentials', remote.credsfile,
'--dry-run',
])
assert result.exit_code == 0

remote.app.authmodel('datasets/gov/example/Country', ['getall'])
resp = remote.app.get('/datasets/gov/example/Country')
remote.app.authmodel('datasets/gov/example/text_push/Country', ['getall'])
resp = remote.app.get('/datasets/gov/example/text_push/Country')
assert listdata(resp, 'code', 'name') == []


@pytest.mark.skip('todo')
def test_text_type_push_chunks(
postgresql,
rc,
Expand All @@ -2506,13 +2505,13 @@ def test_text_type_push_chunks(
):
create_tabular_manifest(tmp_path / 'manifest.csv', striptable('''
d | r | b | m | property | source | type | ref | access
datasets/gov/example | | | |
datasets/gov/example/text_chunks | | | |
| data | | sql | |
| | | | | |
| | | country | salis | | code |
| | | | code | kodas | string | | open
| | | | name@lt | pavadinimas | text | | open
| | | | name@en | pavadinimas | text | | open
| | | | name@lt | pavadinimas | string | | open
| | | | name@en | pavadinimas | string | | open
'''))

# Configure local server with SQL backend
Expand All @@ -2525,22 +2524,22 @@ def test_text_type_push_chunks(
# Push data from local to remote.
cli.invoke(localrc, [
'push',
'-d', 'datasets/gov/example',
'-d', 'datasets/gov/example/text_chunks',
'-o', 'spinta+' + remote.url,
'--credentials', remote.credsfile,
'--chunk-size=1',
])

cli.invoke(localrc, [
'push',
'-d', 'datasets/gov/example',
'-d', 'datasets/gov/example/text_chunks',
'-o', 'spinta+' + remote.url,
'--credentials', remote.credsfile,
'--chunk-size=1',
])

remote.app.authmodel('datasets/gov/example/country', ['getall'])
resp = remote.app.get('/datasets/gov/example/country')
remote.app.authmodel('datasets/gov/example/text_chunks/country', ['getall'])
resp = remote.app.get('/datasets/gov/example/text_chunks/country')
assert listdata(resp, 'code', 'name') == [
('ee', 'Estija'),
('lt', 'Lietuva'),
Expand All @@ -2551,12 +2550,12 @@ def test_text_type_push_chunks(
def test_text_type_push_state(postgresql, rc, cli: SpintaCliRunner, responses, tmp_path, geodb, request):
create_tabular_manifest(tmp_path / 'manifest.csv', striptable('''
d | r | b | m | property | source | type | ref | access
datasets/gov/example | | | |
datasets/gov/example/text | | | |
| data | | sql | |
| | | | | |
| | | country | salis | | code |
| | | | code | kodas | string | | open
| | | | name@lt | pavadinimas | text | | open
| | | | name@lt | pavadinimas | string | | open
'''))

# Configure local server with SQL backend
Expand All @@ -2569,7 +2568,7 @@ def test_text_type_push_state(postgresql, rc, cli: SpintaCliRunner, responses, t
# Push one row, save state and stop.
cli.invoke(localrc, [
'push',
'-d', 'datasets/gov/example',
'-d', 'datasets/gov/example/text',
'-o', remote.url,
'--credentials', remote.credsfile,
'--chunk-size', '1k',
Expand All @@ -2578,20 +2577,20 @@ def test_text_type_push_state(postgresql, rc, cli: SpintaCliRunner, responses, t
'--state', tmp_path / 'state.db',
])

remote.app.authmodel('datasets/gov/example/country', ['getall'])
resp = remote.app.get('/datasets/gov/example/country')
remote.app.authmodel('/datasets/gov/example/text/country', ['getall'])
resp = remote.app.get('/datasets/gov/example/text/country')
assert len(listdata(resp)) == 1

cli.invoke(localrc, [
'push',
'-d', 'datasets/gov/example',
'-d', 'datasets/gov/example/text',
'-o', remote.url,
'--credentials', remote.credsfile,
'--stop-row', '1',
'--state', tmp_path / 'state.db',
])

resp = remote.app.get('/datasets/gov/example/country')
resp = remote.app.get('/datasets/gov/example/text/country')
assert len(listdata(resp)) == 2


Expand Down

0 comments on commit 99b9a3c

Please sign in to comment.