204 added Text to sql, bug fixes

atviriduomenys · Nov 10, 2023 · 99b9a3c · 99b9a3c
1 parent db1bd6a
commit 99b9a3c
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 60 deletions.
diff --git a/spinta/components.py b/spinta/components.py
@@ -705,7 +705,7 @@ class Property(Node):
         'lang': {'type': 'object'},
         'comments': {},
         'given_name': {'type': 'string'},
-        'explicitly_given': {'type': 'boolean', 'default': True},
+        'explicitly_given': {'type': 'boolean'},
         'prepare_given': {'required': False},
     }
 

diff --git a/spinta/datasets/backends/sql/commands/query.py b/spinta/datasets/backends/sql/commands/query.py
@@ -952,13 +952,13 @@ def sort(env: SqlQueryBuilder, expr: Expr):
     args, kwargs = expr.resolve(env)
     env.sort = []
     for key in args:
-        prop = env.model.properties[key.name]
-        column = env.backend.get_column(env.table, prop)
-        if isinstance(key, Negative):
-            column = column.desc()
-        else:
-            column = column.asc()
-        env.sort.append(column)
+        result = env.call('sort', key)
+        env.sort.append(result)
+
+
+@ufunc.resolver(SqlQueryBuilder, DataType)
+def sort(env, dtype):
+    return env.call('asc', dtype)
 
 
 @ufunc.resolver(SqlQueryBuilder, Bind)
@@ -992,6 +992,18 @@ def _get_from_flatprops(model: Model, prop: str):
         raise exceptions.FieldNotInResource(model, property=prop)
 
 
+@ufunc.resolver(SqlQueryBuilder, DataType)
+def negative(env: SqlQueryBuilder, dtype: DataType):
+    return Negative(dtype.prop.place)
+
+
+@ufunc.resolver(SqlQueryBuilder, String)
+def negative(env: SqlQueryBuilder, dtype: String):
+    if dtype.prop.parent and isinstance(dtype.prop.parent.dtype, Text):
+        return Negative(dtype.prop.place.replace('.', '@'))
+    return Negative(dtype.prop.place)
+
+
 @ufunc.resolver(SqlQueryBuilder, int)
 def limit(env: SqlQueryBuilder, n: int):
     env.limit = n

diff --git a/spinta/manifests/tabular/helpers.py b/spinta/manifests/tabular/helpers.py
@@ -777,7 +777,7 @@ def _string_datatype_handler(reader: PropertyReader, row: dict):
             if lang and lang in existing_data['langs']:
                 raise Exception("LANG ALREADY DEFINED")
             elif not lang:
-                existing_data['langs'][''] = new_data
+                existing_data['langs']['C'] = new_data
             else:
                 existing_data['langs'][lang] = new_data
         else:
@@ -808,7 +808,7 @@ def _text_datatype_handler(reader: PropertyReader, row: dict):
             f"Now it is defined in {context.name!r} {context.type} context."
         )
     result = _check_if_property_already_set(reader, row, given_name)
-    if not (result and result['explicitly_given'] is False and result['type'] == 'text'):
+    if not (result and result['explicitly_given'] is False and result['type'] == 'text' or not result):
         raise Exception("ALREADY TEXT SET")
     dtype = _get_type_repr(row['type'])
     dtype = _parse_dtype_string(dtype)
@@ -834,20 +834,27 @@ def _text_datatype_handler(reader: PropertyReader, row: dict):
         new_data['external'] = {
             'name': row['source'],
         }
-    temp_data = new_data.copy()
-    temp_data['explicitly_given'] = False
-    temp_data.pop('langs')
+    temp_data = _empty_property(_initial_normal_property_schema(given_name, dtype, {
+        'property': row['property'],
+        'access': row['access'],
+    }))
     temp_data['type'] = 'string'
+    temp_data['external'] = new_data['external']
     if result:
         new_data['langs'] = result['langs']
+        if new_data['level'] and int(new_data['level']) <= 3:
+            new_data['langs']['C'] = temp_data
+            if new_data['external']:
+                new_data['external'] = {}
         result.update(new_data)
-        if result['level'] and int(result['level']) <= 3:
-            result['langs']['C'] = temp_data
+        return result
     else:
-        if result['level'] and int(new_data['level']) <= 3:
+        if new_data['level'] and int(new_data['level']) <= 3:
             new_data['langs'] = {
                 'C': temp_data
             }
+            if new_data['external']:
+                new_data['external'] = {}
         return new_data
 
 
@@ -937,6 +944,11 @@ def _combine_parent_with_prop(prop_name: str, prop: dict, parent_prop: dict, ful
     return return_name
 
 
+def _empty_property(data: dict):
+    data['explicitly_given'] = False
+    return data
+
+
 def _get_parent_data_array(reader: PropertyReader, given_row: dict, full_name: str, current_parent: dict):
     name = full_name.split('.')[-1]
     count = name.count('[]')
@@ -947,20 +959,20 @@ def _get_parent_data_array(reader: PropertyReader, given_row: dict, full_name: s
         'access': given_row['access'],
     })
     if not current_parent:
-        current_parent.update(_array_datatype_handler(reader, empty_array_row))
+        current_parent.update(_empty_property(_array_datatype_handler(reader, empty_array_row)))
     adjustment = 1 if current_parent['type'] in ALLOWED_ARRAY_TYPES else 0
     for i in range(count - adjustment):
         if current_parent['type'] in ALLOWED_ARRAY_TYPES:
             if current_parent['items'] and current_parent['items']['type'] not in ALLOWED_ARRAY_TYPES:
                 raise NestedDataTypeMissmatch(initial=current_parent['type'], required='array')
             elif not current_parent['items']:
-                current_parent['items'].update(_handle_datatype(reader, empty_array_row))
+                current_parent['items'].update(_empty_property(_array_datatype_handler(reader, empty_array_row)))
             current_parent = current_parent['items']
         elif current_parent['type'] in ALLOWED_PARTIAL_TYPES:
             if root_name in current_parent['properties'] and current_parent['properties'][root_name]['type'] not in ALLOWED_ARRAY_TYPES:
                 raise NestedDataTypeMissmatch(initial=current_parent['type'], required='array')
             elif root_name not in current_parent['properties']:
-                current_parent['properties'][root_name] = _handle_datatype(reader, empty_array_row)
+                current_parent['properties'][root_name] = _empty_property(_array_datatype_handler(reader, empty_array_row))
             current_parent = current_parent['properties'][root_name]
     return current_parent
 
@@ -973,19 +985,19 @@ def _get_parent_data_partial(reader: PropertyReader, given_row: dict, full_name:
     })
     name = _clean_up_prop_name(full_name.split('.')[-1])
     if not current_parent:
-        current_parent.update(_handle_datatype(reader, empty_partial_row))
+        current_parent.update(_empty_property(_partial_datatype_handler(reader, empty_partial_row)))
     else:
         if current_parent['type'] in ALLOWED_ARRAY_TYPES:
             if current_parent['items'] and current_parent['items']['type'] not in ALLOWED_PARTIAL_TYPES:
                 raise NestedDataTypeMissmatch(initial=current_parent['type'], required='partial')
             elif not current_parent['items']:
-                current_parent['items'].update(_handle_datatype(reader, empty_partial_row))
+                current_parent['items'].update(_empty_property(_partial_datatype_handler(reader, empty_partial_row)))
             current_parent = current_parent['items']
         elif current_parent['type'] in ALLOWED_PARTIAL_TYPES:
             if name in current_parent['properties'] and current_parent['properties'][name]['type'] not in ALLOWED_PARTIAL_TYPES:
                 raise NestedDataTypeMissmatch(initial=current_parent['type'], required='partial')
             elif name not in current_parent['properties']:
-                current_parent['properties'][name] = _handle_datatype(reader, empty_partial_row)
+                current_parent['properties'][name] = _empty_property(_partial_datatype_handler(reader, empty_partial_row))
             current_parent = current_parent['properties'][name]
     return current_parent
 
@@ -2174,16 +2186,18 @@ def _property_to_tabular(
     elif isinstance(prop.dtype, Object):
         for obj_prop in prop.dtype.properties.values():
             yield_rows.append(obj_prop)
+    elif isinstance(prop.dtype, Text):
+        for lang_prop in prop.dtype.langs.values():
+            yield_rows.append(lang_prop)
 
     elif prop.enum is not None:
         data['ref'] = prop.given.enum
     elif prop.unit is not None:
         data['ref'] = prop.given.unit
     data, prepare_rows = _prepare_to_tabular(data, prop)
-    if prop.given.name:
+    if prop.given.explicit:
         yield torow(DATASET, data)
     yield from prepare_rows
-    #yield from _text_to_tabular(prop)
     yield from _comments_to_tabular(prop.comments, access=access)
     yield from _lang_to_tabular(prop.lang)
     yield from _enums_to_tabular(

diff --git a/spinta/nodes.py b/spinta/nodes.py
@@ -5,23 +5,17 @@
 from typing import Tuple
 from typing import Type
 from typing import Union
-from typing import TypeVar
-from typing import Iterator
-from typing import NamedTuple
 from typing import overload
-from collections import defaultdict
 from spinta import commands
 from spinta import exceptions
 from spinta.components import Component
 from spinta.components import Config
 from spinta.components import Context
 from spinta.components import EntryId
 from spinta.components import Node
-from spinta.components import Model
 from spinta.manifests.components import Manifest
 from spinta.utils.schema import NA
 from spinta.utils.schema import resolve_schema
-from spinta.types.datatype import Ref
 
 
 def get_node(
@@ -233,10 +227,3 @@ def load_model_properties(
         prop = commands.load(context, prop, params, model.manifest)
         model.properties[name] = prop
         model.flatprops[name] = prop
-
-
-class _SplitProp(NamedTuple):
-    type: str   # text, object, array
-    name: str   # base part of the name
-    tail: str   # tail part of the name
-    data: str   # property data dict
diff --git a/spinta/types/model.py b/spinta/types/model.py
@@ -259,8 +259,6 @@ def load(
     prop, data = load_node(context, prop, data, mixed=True)
     prop = cast(Property, prop)
 
-    if 'prepare_given' in data and data['prepare_given']:
-        prop.given.prepare = data['prepare_given']
     parents = list(itertools.chain(
         [prop.model, prop.model.ns],
         prop.model.ns.parents(),
@@ -313,7 +311,7 @@ def load(
     else:
         prop.given.enum = unit
     prop.given.name = prop.given_name if prop.given_name else prop.name
-    prop.given.explicit = prop.explicitly_given if prop.explicitly_given else True
+    prop.given.explicit = prop.explicitly_given if prop.explicitly_given is not None else True
     return prop
 
 

diff --git a/tests/datasets/test_sql.py b/tests/datasets/test_sql.py
@@ -2459,12 +2459,12 @@ def test_cast_string(
 def test_type_text_push(postgresql, rc, cli: SpintaCliRunner, responses, tmpdir, geodb, request):
     create_tabular_manifest(tmpdir / 'manifest.csv', striptable('''
         d | r | b | m | property| type   | ref     | source       | access
-        datasets/gov/example    |        |         |              |
+        datasets/gov/example/text_push    |        |         |              |
           | data                | sql    |         |              |
           |   |                 |        |         |              |
           |   |   | Country     |        | code    | salis        |
           |   |   |   | code    | string |         | kodas        | open
-          |   |   |   | name@lt | text   |         | pavadinimas  | open
+          |   |   |   | name@lt | string |         | pavadinimas  | open
           |   |                 |        |         |              |
           |   |   | City        |        | name    | miestas      |
           |   |   |   | name    | string |         | pavadinimas  | open
@@ -2482,19 +2482,18 @@ def test_type_text_push(postgresql, rc, cli: SpintaCliRunner, responses, tmpdir,
     assert remote.url == 'https://example.com/'
     result = cli.invoke(localrc, [
         'push',
-        '-d', 'datasets/gov/example',
+        '-d', 'datasets/gov/example/text_push',
         '-o', remote.url,
         '--credentials', remote.credsfile,
         '--dry-run',
     ])
     assert result.exit_code == 0
 
-    remote.app.authmodel('datasets/gov/example/Country', ['getall'])
-    resp = remote.app.get('/datasets/gov/example/Country')
+    remote.app.authmodel('datasets/gov/example/text_push/Country', ['getall'])
+    resp = remote.app.get('/datasets/gov/example/text_push/Country')
     assert listdata(resp, 'code', 'name') == []
 
 
-@pytest.mark.skip('todo')
 def test_text_type_push_chunks(
     postgresql,
     rc,
@@ -2506,13 +2505,13 @@ def test_text_type_push_chunks(
 ):
     create_tabular_manifest(tmp_path / 'manifest.csv', striptable('''
     d | r | b | m | property | source      | type   | ref     | access
-    datasets/gov/example     |             |        |         |
+    datasets/gov/example/text_chunks     |             |        |         |
       | data                 |             | sql    |         |
       |   |                  |             |        |         |
       |   |   | country      | salis       |        | code    |
       |   |   |   | code     | kodas       | string |         | open
-      |   |   |   | name@lt  | pavadinimas | text   |         | open
-      |   |   |   | name@en  | pavadinimas | text   |         | open
+      |   |   |   | name@lt  | pavadinimas | string |         | open
+      |   |   |   | name@en  | pavadinimas | string |         | open
     '''))
 
     # Configure local server with SQL backend
@@ -2525,22 +2524,22 @@ def test_text_type_push_chunks(
     # Push data from local to remote.
     cli.invoke(localrc, [
         'push',
-        '-d', 'datasets/gov/example',
+        '-d', 'datasets/gov/example/text_chunks',
         '-o', 'spinta+' + remote.url,
         '--credentials', remote.credsfile,
         '--chunk-size=1',
     ])
 
     cli.invoke(localrc, [
         'push',
-        '-d', 'datasets/gov/example',
+        '-d', 'datasets/gov/example/text_chunks',
         '-o', 'spinta+' + remote.url,
         '--credentials', remote.credsfile,
         '--chunk-size=1',
     ])
 
-    remote.app.authmodel('datasets/gov/example/country', ['getall'])
-    resp = remote.app.get('/datasets/gov/example/country')
+    remote.app.authmodel('datasets/gov/example/text_chunks/country', ['getall'])
+    resp = remote.app.get('/datasets/gov/example/text_chunks/country')
     assert listdata(resp, 'code', 'name') == [
         ('ee', 'Estija'),
         ('lt', 'Lietuva'),
@@ -2551,12 +2550,12 @@ def test_text_type_push_chunks(
 def test_text_type_push_state(postgresql, rc, cli: SpintaCliRunner, responses, tmp_path, geodb, request):
     create_tabular_manifest(tmp_path / 'manifest.csv', striptable('''
     d | r | b | m | property | source      | type   | ref     | access
-    datasets/gov/example     |             |        |         |
+    datasets/gov/example/text     |             |        |         |
       | data                 |             | sql    |         |
       |   |                  |             |        |         |
       |   |   | country      | salis       |        | code    |
       |   |   |   | code     | kodas       | string |         | open
-      |   |   |   | name@lt  | pavadinimas | text   |         | open
+      |   |   |   | name@lt  | pavadinimas | string |         | open
     '''))
 
     # Configure local server with SQL backend
@@ -2569,7 +2568,7 @@ def test_text_type_push_state(postgresql, rc, cli: SpintaCliRunner, responses, t
     # Push one row, save state and stop.
     cli.invoke(localrc, [
         'push',
-        '-d', 'datasets/gov/example',
+        '-d', 'datasets/gov/example/text',
         '-o', remote.url,
         '--credentials', remote.credsfile,
         '--chunk-size', '1k',
@@ -2578,20 +2577,20 @@ def test_text_type_push_state(postgresql, rc, cli: SpintaCliRunner, responses, t
         '--state', tmp_path / 'state.db',
     ])
 
-    remote.app.authmodel('datasets/gov/example/country', ['getall'])
-    resp = remote.app.get('/datasets/gov/example/country')
+    remote.app.authmodel('/datasets/gov/example/text/country', ['getall'])
+    resp = remote.app.get('/datasets/gov/example/text/country')
     assert len(listdata(resp)) == 1
 
     cli.invoke(localrc, [
         'push',
-        '-d', 'datasets/gov/example',
+        '-d', 'datasets/gov/example/text',
         '-o', remote.url,
         '--credentials', remote.credsfile,
         '--stop-row', '1',
         '--state', tmp_path / 'state.db',
     ])
 
-    resp = remote.app.get('/datasets/gov/example/country')
+    resp = remote.app.get('/datasets/gov/example/text/country')
     assert len(listdata(resp)) == 2