Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: added optimizer integration test back #2519

Merged
merged 1 commit into from
May 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions jina/optimizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ def __call__(self, response):

:param response: response message
"""
self._n_docs += len(response.search.docs)
self._n_docs += len(response.data.docs)
logger.info(f'Num of docs evaluated: {self._n_docs}')
for doc in response.search.docs:
for doc in response.data.docs:
for evaluation in doc.evaluations:
self._evaluation_values[evaluation.op_name] += evaluation.value

Expand Down
31 changes: 11 additions & 20 deletions jina/optimizers/flow_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections.abc import Iterable
from typing import Optional, Union, List

from ..types.document.generators import from_lines
from ..flow import Flow
from ..helper import colored
from ..logging.predefined import default_logger as logger
Expand Down Expand Up @@ -39,46 +40,36 @@ def __init__(
flow_yaml: str,
documents: Union[Iterable, str],
request_size: int,
execution_method: str,
documents_parameter_name: Optional[str] = 'inputs',
execution_endpoint: str,
overwrite_workspace: bool = False,
):
"""
`documents` maps to a parameter of the `execution_method`, depending on the method.
`documents` maps to a parameter of the `execution_endpoint`, depending on the method.
If you use a generator function/list as `documents`, the default will work out of the box.
Otherwise, the following settings will work:

indexing + jsonlines file: `execution_methos='index_lines', documents_parameter_name='filepath'`
search + jsonlines file: `execution_methos='search_lines', documents_parameter_name='filepath'`

indexing + file pattern: `execution_methos='index_files', documents_parameter_name='pattern'`
search + file pattern: `execution_methos='search_files', documents_parameter_name='pattern'`

For more reasonable values, have a look at the :class:`Flow`.

:param flow_yaml: Path to Flow yaml
:param documents: Input parameter for `execution_method` for iterating documents.
:param documents: Input parameter for `execution_endpoint` for iterating documents.
(e.g. a list of documents for `index` or a .jsonlines file for `index_lines`)
:param request_size: Request size used in the flow
:param execution_method: One of the methods of the Jina :py:class:`Flow` (e.g. `index_lines`)
:param documents_parameter_name: The `documents` will be mapped to `documents_parameter_name` in the function `execution_function`.
See `jina/flow/__init__.py::Flow` for more details.
:param execution_endpoint: The endpoint, `f.post(on=)` should point to
:param overwrite_workspace: True, means workspace created by the Flow will be overwriten with each execution.
:raises TypeError: When the documents are neither a `str` nor an `Iterable`
"""
super().__init__()
self._flow_yaml = flow_yaml

if type(documents) is str:
self._documents = documents
self._documents = list(from_lines(filepath=documents))
elif isinstance(documents, Iterable):
self._documents = list(documents)
else:
raise TypeError(f"documents is of wrong type: {type(documents)}")

self._request_size = request_size
self._execution_method = execution_method
self._documents_parameter_name = documents_parameter_name
self._execution_endpoint = execution_endpoint
self._overwrite_workspace = overwrite_workspace

def _setup_workspace(self, workspace):
Expand Down Expand Up @@ -116,13 +107,13 @@ def run(
:param kwargs: keyword argument
"""
self._setup_workspace(workspace)
additional_arguments = {self._documents_parameter_name: self._documents}
additional_arguments.update(kwargs)
with Flow.load_config(self._flow_yaml, context=trial_parameters) as f:
getattr(f, self._execution_method)(
f.post(
inputs=self._documents,
on=self._execution_endpoint,
request_size=self._request_size,
on_done=callback,
**additional_arguments,
**kwargs,
)


Expand Down
Empty file.
10 changes: 10 additions & 0 deletions tests/integration/optimizers/data.jsonlines
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
{"document": {"mimeType": "text/plain", "text": "hello"}, "groundtruth": {"mimeType": "text/plain", "text": "hello"}}
9 changes: 9 additions & 0 deletions tests/integration/optimizers/flow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
!Flow
version: '1'
env:
JINA_DUMMYCRAFTER_PARAM1_VAR: ${{JINA_DUMMYCRAFTER_PARAM1}}
JINA_DUMMYCRAFTER_PARAM2_VAR: ${{JINA_DUMMYCRAFTER_PARAM2}}
JINA_DUMMYCRAFTER_PARAM3_VAR: ${{JINA_DUMMYCRAFTER_PARAM3}}
pods:
- uses: pods/craft.yml
- uses: pods/evaluate.yml
14 changes: 14 additions & 0 deletions tests/integration/optimizers/optimizer_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
!FlowOptimizer
version: '1'
with:
flow_runner: !SingleFlowRunner
with:
flow_yaml: flow.yml
overwrite_workspace: True
documents: ${{JINA_OPTIMIZER_DATA_FILE}}
request_size: 1
execution_endpoint: 'search'
evaluation_callback: !MeanEvaluationCallback {}
parameter_yaml: ${{JINA_OPTIMIZER_PARAMETER_FILE}}
workspace_base_dir: ${{JINA_OPTIMIZER_WORKSPACE_DIR}}
n_trials: 5
18 changes: 18 additions & 0 deletions tests/integration/optimizers/parameter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
- !IntegerParameter
jaml_variable: JINA_DUMMYCRAFTER_PARAM1
high: 1
low: 0
step_size: 1
parameter_name: param1
- !IntegerParameter
jaml_variable: JINA_DUMMYCRAFTER_PARAM2
high: 1
low: 1
step_size: 1
parameter_name: param2
- !IntegerParameter
jaml_variable: JINA_DUMMYCRAFTER_PARAM3
high: 2
low: 1
step_size: 1
parameter_name: param3
7 changes: 7 additions & 0 deletions tests/integration/optimizers/pods/craft.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
!DummyCrafter
with:
param1: ${{JINA_DUMMYCRAFTER_PARAM1_VAR}}
param2: ${{JINA_DUMMYCRAFTER_PARAM2_VAR}}
param3: ${{JINA_DUMMYCRAFTER_PARAM3_VAR}}
metas:
py_modules: 'dummy_crafter.py'
55 changes: 55 additions & 0 deletions tests/integration/optimizers/pods/dummy_crafter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from typing import Dict

from jina import Executor, requests

from jina.optimizers.parameters import IntegerParameter


class DummyCrafter(Executor):
DEFAULT_OPTIMIZATION_PARAMETER = [
IntegerParameter(
executor_name='DummyCrafter',
parameter_name='param1',
low=0,
high=1,
step_size=1,
),
IntegerParameter(
executor_name='DummyCrafter',
parameter_name='param2',
low=0,
high=1,
step_size=1,
),
IntegerParameter(
executor_name='DummyCrafter',
parameter_name='param3',
low=0,
high=2,
step_size=1,
),
]

GOOD_PARAM_1 = 0
GOOD_PARAM_2 = 1
GOOD_PARAM_3 = 1

def __init__(self, param1: int, param2: int, param3: int, *args, **kwargs):
super().__init__(*args, **kwargs)
self.param1 = param1
self.param2 = param2
self.param3 = param3

@property
def good_params(self):
return (
self.param1 == DummyCrafter.GOOD_PARAM_1
and self.param2 == DummyCrafter.GOOD_PARAM_2
and self.param3 == DummyCrafter.GOOD_PARAM_3
)

@requests
def craft(self, docs, *args, **kwargs) -> Dict:
for doc in docs:
if not self.good_params:
doc.text = ''
17 changes: 17 additions & 0 deletions tests/integration/optimizers/pods/dummy_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from jina import Executor, requests, DocumentArray


class DummyTextEvaluator(Executor):
@property
def metric(self) -> str:
return 'DummyTextEvaluator'

@requests
def evaluate(self, docs: 'DocumentArray', groundtruths: 'DocumentArray', **kwargs):
for doc, groundtruth in zip(docs, groundtruths):
evalulation = doc.evaluations.add()
evalulation.op_name = f'DummyScore'
if doc.text == groundtruth.text:
evalulation.value = 1.0
else:
evalulation.value = 0.0
3 changes: 3 additions & 0 deletions tests/integration/optimizers/pods/evaluate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
!DummyTextEvaluator
metas:
py_modules: 'dummy_evaluate.py'
18 changes: 18 additions & 0 deletions tests/integration/optimizers/test_discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
from distutils.dir_util import copy_tree
from shutil import copy2

from jina.optimizers.discovery import run_parameter_discovery

cur_dir = os.path.dirname(os.path.abspath(__file__))


def test_discovery(tmpdir):
copy2(os.path.join(cur_dir, 'flow.yml'), tmpdir)
pod_dir = os.path.join(tmpdir, 'pods')
copy_tree(os.path.join(cur_dir, 'pods'), pod_dir)
parameter_result_file = os.path.join(tmpdir, 'parameter.yml')
run_parameter_discovery(
[os.path.join(tmpdir, 'flow.yml')], parameter_result_file, True
)
assert os.path.exists(parameter_result_file)
Loading