From 055c0ea810651e16369237b9822187fb97f7a735 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Fri, 6 Sep 2019 15:21:25 +0300 Subject: [PATCH 001/133] Initial commit, async agent, pipeline class, connectors and state manager changes --- config.py | 2 + core/agent.py | 186 +++++++++++++------- core/config_parser.py | 130 ++++++++++++++ core/connectors.py | 40 +++++ core/pipeline.py | 93 ++++++++++ core/rest_caller.py | 58 ------ core/run.py | 281 ++++-------------------------- core/skill_manager.py | 102 ----------- core/state_manager.py | 162 +++++++++-------- state_formatters/dp_formatters.py | 8 +- 10 files changed, 503 insertions(+), 559 deletions(-) create mode 100644 core/config_parser.py create mode 100644 core/connectors.py create mode 100644 core/pipeline.py delete mode 100644 core/rest_caller.py delete mode 100644 core/skill_manager.py diff --git a/config.py b/config.py index d320b275..77394bcd 100644 --- a/config.py +++ b/config.py @@ -92,3 +92,5 @@ RESPONSE_SELECTORS = [] POSTPROCESSORS = [] + +DEBUG = True diff --git a/core/agent.py b/core/agent.py index 9d057e95..749ee8cc 100644 --- a/core/agent.py +++ b/core/agent.py @@ -1,68 +1,128 @@ -from datetime import datetime -from typing import Sequence, Hashable, Any, Callable, List, Dict -from itertools import compress -import operator +import asyncio -from core.state_manager import StateManager, get_state -from core.skill_manager import SkillManager -from models.hardcode_utterances import TG_START_UTT -from core.state_schema import Dialog, Human +from collections import defaultdict +from datetime import datetime +from time import time +from typing import Any, Optional, Callable, Hashable -Profile = Dict[str, Any] +from core.pipeline import Pipeline +from core.state_manager import StateManager +from core.state_schema import Dialog -class Agent: - def __init__(self, state_manager: StateManager, preprocessors: List[Callable], - postprocessor: Callable, - skill_manager: SkillManager) -> None: +class AsyncAgent: + def __init__(self, pipeline: Pipeline, state_manager: StateManager, + process_logger_callable: Optional[Callable] = None, + response_logger_callable: Optional[Callable] = None): + self.workflow = dict() + self.pipeline = pipeline self.state_manager = state_manager - self.preprocessors = preprocessors - self.postprocessor = postprocessor - self.skill_manager = skill_manager - - def __call__(self, utterances: Sequence[str], user_telegram_ids: Sequence[Hashable], - user_device_types: Sequence[Any], - date_times: Sequence[datetime], locations=Sequence[Any], - channel_types=Sequence[str]): - should_reset = [utterance == TG_START_UTT for utterance in utterances] - # here and further me stands for mongoengine - me_users = self.state_manager.get_or_create_users(user_telegram_ids, user_device_types) - me_dialogs = self.state_manager.get_or_create_dialogs(me_users, locations, channel_types, - should_reset) - self.state_manager.add_human_utterances(me_dialogs, utterances, date_times) - informative_dialogs = list(compress(me_dialogs, map(operator.not_, should_reset))) - self._update_annotations(informative_dialogs) - - selected_skills = self.skill_manager.get_skill_responses(me_dialogs) - self._update_utterances(me_dialogs, selected_skills, key='selected_skills') - - skill_names, responses, confidences, profiles = self.skill_manager(me_dialogs) - self._update_profiles(me_users, profiles) - - self.state_manager.add_bot_utterances(me_dialogs, responses, responses, - [datetime.utcnow()] * len(me_dialogs), - skill_names, confidences) - - sent_responses = self.postprocessor(me_dialogs) - self._update_utterances(me_dialogs, sent_responses, key='text') - self._update_annotations(me_dialogs) - - return sent_responses # return text only to the users - - def _update_annotations(self, me_dialogs: Sequence[Dialog]) -> None: - for prep in self.preprocessors: - annotations = prep(get_state(me_dialogs)) - utterances = [dialog.utterances[-1] for dialog in me_dialogs] - self.state_manager.add_annotations(utterances, annotations) - - def _update_profiles(self, me_users: Sequence[Human], profiles: List[Profile]) -> None: - for me_user, profile in zip(me_users, profiles): - if any(profile.values()): - self.state_manager.update_user_profile(me_user, profile) - - def _update_utterances(self, me_dialogs: Sequence[Dialog], values: Sequence[Any], - key: str) -> None: - if values: - utterances = [dialog.utterances[-1] for dialog in me_dialogs] - for utt, val in zip(utterances, values): - self.state_manager.update_me_object(utt, {key: val}) + self.process_logger_callable = process_logger_callable + self.response_logger_callable = response_logger_callable + + def add_workflow_record(self, dialog: Dialog): + if dialog.id in self.workflow.keys(): + raise ValueError(f'dialog with id {dialog.id} is already in workflow') + self.workflow[dialog.id] = {'dialog': dialog, 'services': defaultdict(dict)} + + def get_workflow_record(self, dialog_id): + if dialog_id not in self.workflow.keys(): + raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') + return self.workflow[dialog_id] + + def flush_record(self, dialog_id: str): + if dialog_id not in self.workflow.keys(): + raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') + return self.workflow.pop(dialog_id) + + def save_in_workflow(self, dialog_id: str, key: str, value: Any): + if dialog_id not in self.workflow.keys(): + raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') + self.workflow[dialog_id][key] = value + + def get_from_workflow(self, dialog_id: str, key: str): + if dialog_id not in self.workflow.keys(): + raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') + if dialog_id not in self.workflow[dialog_id].keys(): + raise ValueError(f'Item with key {key} is not exist in dialog {dialog_id}') + return self.workflow[dialog_id][key] + + def register_service_request(self, dialog_id: str, service_name): + if dialog_id not in self.workflow.keys(): + raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') + self.workflow[dialog_id]['services'][service_name] = {'send': time(), 'done': None} + + def get_services_status(self, dialog_id: str): + if dialog_id not in self.workflow.keys(): + raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') + done, waiting = set(), set() + for key, value in self.workflow[dialog_id]['services'].items(): + if value['done'] is not None: + done.add(key) + else: + waiting.add(key) + + return done, waiting + + def process_service_response(self, dialog_id: str, service_name: str = None, response: str = None): + ''' + Ultimate method, which performs next operations: + 1. Updates workflow dict with completed service + 2. Updates dialog within workflow dict, using service update callable + 3. Asks pipeline for next services which became available on current stage workflow + 4. Modifies next services when processed responce is received from a selector service + 5. Returns next services in list form + ''' + workflow_record = self.get_workflow_record(dialog_id) + + # Updating workflow with service response + service = self.pipeline.get_service_by_name(service_name) + if service: + self.workflow[dialog_id]['services'][service_name]['done'] = time() + if response: + service.state_processor_method(workflow_record['dialog'], response) + + # Calculating next steps + done, waiting = self.get_services_status(dialog_id) + next_services = self.pipeline.get_next_services(done, waiting) + + # Processing the case, when service is skill selector + if service and service.is_selector(): + selected_services = list(response.values())[0] + result = [] + for service in next_services: + if service.name not in selected_services: + self.workflow[dialog_id]['services'][service.name] = {'done': time(), 'send': None} + else: + result.append(service) + next_services = result + if self.process_logger_callable: + self.process_logger_callable(self.workflow['dialog_id']) # send dialog workflow record to further logging operations + + return next_services + + async def register_msg(self, utterance: str, user_telegram_id: Hashable, + user_device_type: Any, + date_time: datetime, location=Any, + channel_type=str, deadline=None): + + user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) + dialog = self.state_manager.get_or_create_dialog(user, location, channel_type) + self.state_manager.add_human_utterance(dialog, utterance, date_time) + self.add_workflow_record(dialog) + + await self.process(dialog.id) + + async def process(self, dialog_id, service_name=None, response=None): + workflow_record = self.get_workflow_record(dialog_id) + next_services = self.process_service_response(dialog_id, service_name, response) + + for service in next_services: + self.register_service_request(dialog_id, service.name) + payload = service.apply_workflow_formatter(workflow_record) + response = await service.connector.send(payload) + if service.is_responder(): + self.flush_record(dialog_id) + break + if response is not None: + await self.process(dialog_id, service.name, response) diff --git a/core/config_parser.py b/core/config_parser.py new file mode 100644 index 00000000..e31dc2c7 --- /dev/null +++ b/core/config_parser.py @@ -0,0 +1,130 @@ +import aiohttp + +from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, RESPONSE_SELECTORS, POSTPROCESSORS +from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector +from core.pipeline import Service, simple_workflow_formatter +from core.state_manager import StateManager + + +services = [] +worker_tasks = [] + + +def make_service_from_config_rec(log_record, session, state_processor_method, tags, names_previous_services, name_modifier=None): + worker_task = None + if name_modifier: + name = name_modifier(log_record['name']) + else: + name = log_record['name'] + formatter = log_record['formatter'] + batch_size = log_record.get('batch_size', 1) + if log_record['protocol'] == 'http': + if log_record.get('external', False): + url = f"http://{log_record['host']}:{log_record['port']}/{log_record['endpoint']}" + else: + url = f"http://{log_record['name']}:{log_record['port']}/{log_record['endpoint']}" + if batch_size == 1: + connector = HTTPConnector(session, url, formatter, log_record['name']) + else: + pass # worker task and queue connector + + service = Service(name, connector, state_processor_method, batch_size, + tags, names_previous_services, simple_workflow_formatter) + return service, worker_task + + +def add_bot_to_name(name): + return f'bot_{name}' + + +session = aiohttp.ClientSession() + +for anno in ANNOTATORS_1: + service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + ['ANNOTATORS_1'], set()) + services.append(service) + worker_tasks.append(worker_task) + +previous_services = {i.name for i in services if 'ANNOTATORS_1' in i.tags} + +if ANNOTATORS_2: + for anno in ANNOTATORS_2: + service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + ['ANNOTATORS_2'], previous_services) + services.append(service) + worker_tasks.append(worker_task) + + previous_services = {i.name for i in services if 'ANNOTATORS_2' in i.tags} + +if ANNOTATORS_3: + for anno in ANNOTATORS_3: + service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + ['ANNOTATORS_3'], previous_services) + services.append(service) + worker_tasks.append(worker_task) + + previous_services = {i.name for i in services if 'ANNOTATORS_3' in i.tags} + +if SKILL_SELECTORS: + for ss in SKILL_SELECTORS: + service, worker_task = make_service_from_config_rec(ss, session, StateManager.do_nothing, + ['SKILL_SELECTORS', 'selector'], previous_services) + services.append(service) + worker_tasks.append(worker_task) + + previous_services = {i.name for i in services if 'SKILL_SELECTORS' in i.tags} + +if SKILLS: + for s in SKILLS: + service, worker_task = make_service_from_config_rec(s, session, StateManager.add_selected_skill, + ['SKILLS'], previous_services) + services.append(service) + worker_tasks.append(worker_task) + + previous_services = {i.name for i in services if 'SKILLS' in i.tags} + +if not RESPONSE_SELECTORS: + services.append(Service('confidence_response_selector', ConfidenceResponseSelectorConnector(), + StateManager.add_bot_utterance_simple, + 1, ['RESPONSE_SELECTORS'], previous_services, simple_workflow_formatter)) +else: + for r in RESPONSE_SELECTORS: + service, worker_task = make_service_from_config_rec(r, session, StateManager.add_bot_utterance_simple, + ['RESPONSE_SELECTORS'], previous_services) + services.append(service) + worker_tasks.append(worker_task) + +previous_services = {i.name for i in services if 'RESPONSE_SELECTORS' in i.tags} + +if POSTPROCESSORS: + for p in POSTPROCESSORS: + service, worker_task = make_service_from_config_rec(p, session, StateManager.add_text, + ['POSTPROCESSORS'], previous_services) + services.append(service) + worker_tasks.append(worker_task) + + previous_services = {i.name for i in services if 'POSTPROCESSORS' in i.tags} + +if ANNOTATORS_1: + for anno in ANNOTATORS_1: + service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + ['POST_ANNOTATORS_1'], previous_services, add_bot_to_name) + services.append(service) + worker_tasks.append(worker_task) + + previous_services = {i.name for i in services if 'POST_ANNOTATORS_1' in i.tags} + +if ANNOTATORS_2: + for anno in ANNOTATORS_2: + service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + ['POST_ANNOTATORS_2'], previous_services, add_bot_to_name) + services.append(service) + worker_tasks.append(worker_task) + + previous_services = {i.name for i in services if 'POST_ANNOTATORS_2' in i.tags} + +for anno in ANNOTATORS_3: + service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['POST_ANNOTATORS_3'], + previous_services, add_bot_to_name) + services.append(service) + worker_tasks.append(worker_task) diff --git a/core/connectors.py b/core/connectors.py new file mode 100644 index 00000000..1fa0a960 --- /dev/null +++ b/core/connectors.py @@ -0,0 +1,40 @@ +import asyncio +import aiohttp +from typing import Dict, List + +''' +class AioQueueConnector: + def __init__(self, queue): + self.queue = queue + await self.queue.join() + + async def send(self, payload: Dict): + await self.queue.put(payload) +''' + +class HTTPConnector: + def __init__(self, session, url, formatter, name): + self.session = session + self.url = url + self.formatter = formatter + self.name = name + + async def send(self, payload: Dict): + async with self.session.post(self.url, json=self.formatter([payload])) as resp: + response = await resp.json() + return {self.name: self.formatter(response[0], mode='out')} + + +class CmdConnector: + async def send(self, payload): + print('bot: ', payload['utterances'][-1]['text']) + + +class ConfidenceResponseSelectorConnector: + """Select a single response for each dialog turn WITHOUT MAGIC METHODS. + """ + async def send(self, payload: Dict): + skill_name = '' + response = payload['utterances'][-1]['selected_skills'] + skill_name = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0][0] + return {'confidence_response_selector': skill_name} diff --git a/core/pipeline.py b/core/pipeline.py new file mode 100644 index 00000000..3afb7294 --- /dev/null +++ b/core/pipeline.py @@ -0,0 +1,93 @@ +from collections import defaultdict + + +class Service: + def __init__(self, name, connector, state_processor_method, + batch_size=1, tags=None, names_previous_services=None, workflow_formatter=None): + self.name = name + self.batch_size = batch_size + self.connector = connector + self.state_processor_method = state_processor_method + self.names_previous_services = names_previous_services or set() + self.tags = tags or [] + self.workflow_formatter = workflow_formatter + self.previous_services = set() + self.next_services = set() + + def is_selector(self): + return 'selector' in self.tags + + def is_responder(self): + return 'responder' in self.tags + + def apply_workflow_formatter(self, workflow_record): + if not self.workflow_formatter: + return workflow_record + return self.workflow_formatter(workflow_record) + + def __repr__(self): + return self.name + + +class Pipeline: + def __init__(self, services): + service_names = [i.name for i in services] + counter = defaultdict(int) + for i in service_names: + counter[i] += 1 + wrong_names = [k for k, v in counter.items() if v != 1] + if wrong_names: + raise ValueError(f'there are some duplicate service names presented {wrong_names}') + + self.services = {i.name: i for i in services} + wrong_links = self.process_service_names() + if wrong_links: + print('wrong links in config were detected: ', dict(wrong_links)) + + def get_service_by_name(self, service_name): + if not service_name: + return None + if service_name not in self.services: + raise ValueError(f'service {service_name} does not exist') + return self.services[service_name] + + def process_service_names(self): + wrong_names = defaultdict(list) + for service in self.services.values(): + for name_prev_service in service.names_previous_services: + if name_prev_service not in self.services: + wrong_names[service.name].append(name_prev_service) + continue + service.previous_services.add(self.services[name_prev_service]) + self.services[name_prev_service].next_services.add(service) + return wrong_names # wrong names means that some service_names, used in previous services don't exist + + def get_next_services(self, done=None, waiting=None): + if not done: + done = set() + if not waiting: + waiting = set() + removed_names = waiting | done + for name, service in self.services.items(): + if not {i.name for i in service.previous_services} <= done: + removed_names.add(name) + + return [service for name, service in self.services.items() if name not in removed_names] + + def get_endpoint_services(self): + return [s for s in self.services.values() if not s.next_services and 'responder' not in s.tags] + + def add_responder_service(self, service): + if 'responder' not in service.tags: + raise ValueError('service should be a responder') + endpoints = self.get_endpoint_services() + service.previous_services = set(endpoints) + service.previous_service_names = {s.name for s in endpoints} + self.services[service.name] = service + + for s in endpoints: + self.services[s.name].next_services.add(service) + + +def simple_workflow_formatter(workflow_record): + return workflow_record['dialog'].to_dict() diff --git a/core/rest_caller.py b/core/rest_caller.py deleted file mode 100644 index e733bcae..00000000 --- a/core/rest_caller.py +++ /dev/null @@ -1,58 +0,0 @@ -from concurrent.futures import ThreadPoolExecutor -from typing import Dict, List, Any, Optional, Sequence, Union, Callable - -import requests - -from core.transform_config import MAX_WORKERS - - -def _make_request(name, url, formatter, payload): - r = requests.post(url, json=formatter(payload)) - if r.status_code != 200: - raise RuntimeError(f'Got {r.status_code} status code for {url}') - return [{name: formatter(response, mode='out')} for response in r.json()] - - -class RestCaller: - """ - Call to REST services, annotations or skills. - """ - - def __init__(self, max_workers: int = MAX_WORKERS, - names: Optional[Sequence[str]] = None, - urls: Optional[Sequence[str]] = None, - formatters = None) -> None: - self.names = tuple(names or ()) - self.urls = tuple(urls or ()) - self.executor = ThreadPoolExecutor(max_workers=max_workers) - self.formatters = formatters - - def __call__(self, payload: Union[Dict, Sequence[Dict]], - names: Optional[Sequence[str]] = None, - urls: Optional[Sequence[str]] = None, - formatters: List[Callable] = None) -> List[Dict[str, Dict[str, Any]]]: - - names = names if names is not None else self.names - urls = urls if urls is not None else self.urls - formatters = formatters if formatters is not None else self.formatters - - if names is None: - raise ValueError('No service names were provided.') - if urls is None: - raise ValueError('No service urls were provided') - if formatters is None: - raise ValueError('No state formatters were provided.') - - if not isinstance(payload, Sequence): - payload = [payload] * len(names) - - total_result = [] - for preprocessed in zip(*self.executor.map(_make_request, names, urls, formatters, - payload)): - res = {} - for data in preprocessed: - res.update(data) - - total_result.append(res) - - return total_result diff --git a/core/run.py b/core/run.py index 0a35a5a4..50e5f067 100644 --- a/core/run.py +++ b/core/run.py @@ -1,261 +1,44 @@ -import argparse -import time -from os import getenv +import asyncio +import aiohttp -from aiohttp import web from datetime import datetime -from string import hexdigits -from threading import Thread -from multiprocessing import Process, Pipe -from multiprocessing.connection import Connection -from typing import Callable, Optional, Collection, Hashable, List, Tuple +from core.agent import AsyncAgent +from core.pipeline import Pipeline, Service, simple_workflow_formatter +from core.connectors import CmdConnector +from core.config_parser import services, worker_tasks, session +from core.state_manager import StateManager +from core.transform_config import DEBUG -import telebot -from telebot.types import Message, Location, User +endpoint = Service('cmd_responder', CmdConnector(), None, 1, ['responder'], set(), simple_workflow_formatter) -parser = argparse.ArgumentParser() -parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, - choices=['telegram', 'cmd_client', 'http_client'], default='cmd_client') -parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) -args = parser.parse_args() -CHANNEL = args.channel +def prepare_agent(): + pipeline = Pipeline(services) + pipeline.add_responder_service(endpoint) -def _model_process(model_function: Callable, conn: Connection, batch_size: int = -1, *, - poll_period: float = 0.5): - model: Callable[[Collection[str], Collection[Hashable]], Collection[str]] = model_function() - if batch_size <= 0: - batch_size = float('inf') + agent = AsyncAgent(pipeline, StateManager) - check_time = time.time() + return agent.register_msg - while True: - batch: List[Tuple[str, Hashable]] = [] - while conn.poll() and len(batch) < batch_size: - batch.append(conn.recv()) - if time.time() - check_time >= poll_period: - break - - if not batch: - continue - - messages, dialog_ids = zip(*batch) - responses = model(messages, dialog_ids) - for response, dialog_id in zip(responses, dialog_ids): - conn.send((response, dialog_id)) - check_time = time.time() # maybe it should be moved before model call - - -def experimental_bot( - model_function: Callable[ - ..., Callable[[Collection[Message], Collection[Hashable]], Collection[str]]], *, - batch_size: int = -1, poll_period: float = 0.5): - """ - - Args: - model_function: a function that produces an agent - token: telegram token string - proxy: https or socks5 proxy string for telebot - batch_size: maximum batch size for the model - poll_period: how long to wait every time no input was done for the model - - Returns: None - - """ - - token = getenv('TELEGRAM_TOKEN') - proxy = getenv('TELEGRAM_PROXY') - - if proxy is not None: - telebot.apihelper.proxy = {'https': proxy} - - bot = telebot.TeleBot(token) - - parent_conn, child_conn = Pipe() - p = Process(target=_model_process, args=(model_function, child_conn), - kwargs={'batch_size': batch_size, 'poll_period': poll_period}) - p.start() - - def responder(): - while True: - text, chat_id = parent_conn.recv() - bot.send_message(chat_id, text) - - t = Thread(target=responder) - t.start() - - @bot.message_handler() - def handle_message(message: Message): - parent_conn.send((message, message.chat.id)) - - bot.polling(none_stop=True) - - -def run(): - from core.agent import Agent - from core.state_manager import StateManager - from core.skill_manager import SkillManager - from core.rest_caller import RestCaller - from models.postprocessor import DefaultPostprocessor - from models.response_selector import ConfidenceResponseSelector - from core.transform_config import MAX_WORKERS, ANNOTATORS, SKILL_SELECTORS, SKILLS, RESPONSE_SELECTORS - - import logging - - logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING) - - state_manager = StateManager() - - preprocessors = [] - for ants in ANNOTATORS: - if ants: - anno_names, anno_urls, anno_formatters = zip( - *[(a['name'], a['url'], a['formatter']) for a in ants]) - else: - anno_names, anno_urls, anno_formatters = [], [], [] - preprocessors.append(RestCaller(max_workers=MAX_WORKERS, names=anno_names, urls=anno_urls, - formatters=anno_formatters)) - - postprocessor = DefaultPostprocessor() - skill_caller = RestCaller(max_workers=MAX_WORKERS) - - if RESPONSE_SELECTORS: - rs_names, rs_urls, rs_formatters = zip( - *[(rs['name'], rs['url'], rs['formatter']) for rs in RESPONSE_SELECTORS]) - response_selector = RestCaller(max_workers=MAX_WORKERS, names=rs_names, urls=rs_urls, - formatters=rs_formatters) - else: - response_selector = ConfidenceResponseSelector() - skill_selector = None - if SKILL_SELECTORS: - ss_names, ss_urls, ss_formatters = zip( - *[(ss['name'], ss['url'], ss['formatter']) for ss in SKILL_SELECTORS]) - skill_selector = RestCaller(max_workers=MAX_WORKERS, names=ss_names, urls=ss_urls, - formatters=ss_formatters) - - skill_manager = SkillManager(skill_selector=skill_selector, response_selector=response_selector, - skill_caller=skill_caller, - profile_handlers=[skill['name'] for skill in SKILLS - if skill.get('profile_handler')]) - - agent = Agent(state_manager, preprocessors, postprocessor, skill_manager) - - def infer_telegram(messages: Collection[Message], dialog_ids): - utterances: List[Optional[str]] = [message.text for message in messages] - tg_users: List[User] = [message.from_user for message in messages] - - u_tg_ids = [str(user.id) for user in tg_users] - u_tg_data = [{ - 'id': user.id, - 'username': user.username, - 'first_name': user.first_name, - 'last_name': user.last_name - } - for user in tg_users] - - u_d_types = [None] * len(messages) - date_times = [datetime.utcnow()] * len(messages) - locations: List[Optional[Location]] = [message.location for message in messages] - ch_types = ['telegram'] * len(messages) - - answers = agent(utterances=utterances, user_telegram_ids=u_tg_ids, - user_device_types=u_d_types, - date_times=date_times, locations=locations, channel_types=ch_types) - return answers - - def infer_cmd(messages, dialog_ids): - utterances: List[Optional[str]] = [message['data'] for message in messages] - u_ids = [str(message['from_user']['id']) for message in messages] - - date_times = [datetime.utcnow()] * len(messages) - locations: List[Optional[Location]] = [None] * len(messages) - - answers = agent(utterances=utterances, user_telegram_ids=u_ids, - user_device_types=[None] * len(messages), - date_times=date_times, locations=locations, - channel_types=['cmd_client'] * len(messages)) - return answers - - if CHANNEL == 'telegram': - return infer_telegram - else: - return infer_cmd - - -async def init_app(): - app = web.Application() - handle_func = await api_message_processor(run()) - app.router.add_post('/', handle_func) - app.router.add_get('/dialogs', users_dialogs) - app.router.add_get('/dialogs/{dialog_id}', dialog) - return app - - -async def api_message_processor(message_processor): - async def api_handle(request): - result = {} - if request.method == 'POST': - if request.headers.get('content-type') != 'application/json': - raise web.HTTPBadRequest(reason='Content-Type should be application/json') - data = await request.json() - user_id = data.get('user_id') - payload = data.get('payload', '') - - if not user_id: - raise web.HTTPBadRequest(reason='user_id key is required') - - message = {'data': payload, 'from_user': {'id': user_id}} - responses = message_processor([message], [1]) - result = {'user_id': user_id, 'response': responses[0]} - return web.json_response(result) - - return api_handle - - -async def users_dialogs(request): - from core.state_schema import Dialog - exist_dialogs = Dialog.objects() - result = list() - for i in exist_dialogs: - result.append( - {'id': str(i.id), 'location': i.location, 'channel_type': i.channel_type, 'user': i.user.to_dict()}) - return web.json_response(result) - - -async def dialog(request): - from core.state_schema import Dialog - dialog_id = request.match_info['dialog_id'] - if dialog_id == 'all': - dialogs = Dialog.objects() - return web.json_response([i.to_dict() for i in dialogs]) - elif len(dialog_id) == 24 and all(c in hexdigits for c in dialog_id): - dialog = Dialog.objects(id__exact=dialog_id) - if not dialog: - raise web.HTTPNotFound(reason=f'dialog with id {dialog_id} is not exist') - else: - return web.json_response(dialog[0].to_dict()) - else: - raise web.HTTPBadRequest(reason='dialog id should be 24-character hex string') - - -def main(): - if CHANNEL == 'telegram': - experimental_bot(run) - elif CHANNEL == 'cmd_client': - message_processor = run() - user_id = input('Provide user id: ') - user = {'id': user_id} - while True: - msg = input(f'You ({user_id}): ').strip() - if msg: - message = {'data': msg, 'from_user': user} - responses = message_processor([message], [1]) - print('Bot: ', responses[0]) - elif CHANNEL == 'http_client': - app = init_app() - web.run_app(app, port=args.port) +async def run(): + register_func = prepare_agent() + user_id = input('Provide user id: ') + while True: + msg = input(f'You ({user_id}): ').strip() + if msg: + await register_func(msg, user_id, 'cmd', datetime.now(), 'lab', 'cmd_client') if __name__ == '__main__': - main() + loop = asyncio.get_event_loop() + loop.set_debug(DEBUG) + future = asyncio.ensure_future(run()) + try: + loop.run_until_complete(future) + except Exception as e: + raise e + finally: + loop.run_until_complete(asyncio.gather(session.close())) + loop.stop() + loop.close() diff --git a/core/skill_manager.py b/core/skill_manager.py deleted file mode 100644 index 5aeef1f8..00000000 --- a/core/skill_manager.py +++ /dev/null @@ -1,102 +0,0 @@ -import copy -import operator -from itertools import compress -from typing import List, Dict, Optional, Sequence -from warnings import warn - -from core.transform_config import MAX_WORKERS, SKILLS -from models.hardcode_utterances import NOANSWER_UTT -from core.state_schema import Human -from core.state_manager import get_state - - -class SkillManager: - - def __init__(self, response_selector, skill_caller, skill_selector=None, profile_handlers: Sequence[str] = ()): - """ - - Args: - response_selector: - skill_caller: - skill_selector: - profile_handlers: list of skill names for getting user profiles info ordered from most to least important - names not in ``self.skill_names`` are ignored - """ - self.skill_selector = skill_selector - self.response_selector = response_selector - self.max_workers = MAX_WORKERS - self.skill_caller = skill_caller - self.skills = SKILLS - self.skill_names = [s['name'] for s in self.skills] - self.profile_handlers = [name for name in reversed(profile_handlers) if name in self.skill_names] - self.profile_fields = list(Human.profile.default.keys()) - - def __call__(self, dialogs): - - skill_responses = [d.utterances[-1]['selected_skills'] for d in dialogs] - user_profiles = self._get_user_profiles(skill_responses) - rs_response = self.response_selector(get_state(dialogs)) - # should be a flatten list because there is always only one ResponseSelector: - selected_skill_names = list(v for d in rs_response for _, v in d.items()) - utterances = [] - confidences = [] - for responses, selected_name in zip(skill_responses, selected_skill_names): - selected_skill = responses[selected_name] - utterances.append(selected_skill['text'] or NOANSWER_UTT) - confidences.append(selected_skill['confidence']) - return selected_skill_names, utterances, confidences, user_profiles - - def _get_user_profiles(self, skill_responses) -> Optional[List[Dict]]: - """ - Get user profile descriptors from compatible skills. - """ - user_profiles = [] - for sr in skill_responses: - profile = {} - for item in self.profile_fields: - for skill in self.profile_handlers: - try: - profile[item] = sr[skill][item] - except KeyError: - pass - user_profiles.append(profile) - return user_profiles - - def get_skill_responses(self, dialogs): - n_dialogs = len(dialogs) - skill_names = [s['name'] for s in self.skills] - skill_urls = [s['url'] for s in self.skills] - skill_formatters = [s['formatter'] for s in self.skills] - - state = get_state(dialogs) - if self.skill_selector is not None: - selected_skills = [list(d.values())[0] for d in self.skill_selector(state)] - for i, skills in enumerate(selected_skills): - for s in skills: - if s not in skill_names: - warn(f'SkillSelector has returned a non-existent skill name "{s}".', - stacklevel=2) - skills.remove(s) - if not skills: - selected_skills[i] = skill_names - else: - selected_skills = [skill_names] * n_dialogs - excluded_skills = [] - for active_names in selected_skills: - excluded_skills.append([n not in active_names for n in skill_names]) - excluded_skills = list(map(list, zip(*excluded_skills))) - - payloads = [] - for exclude, skill in zip(excluded_skills, self.skills): - s = copy.deepcopy(state) - compressed_dialogs = list(compress(s['dialogs'], map(operator.not_, exclude))) - if not compressed_dialogs: - skill_names.remove(skill['name']) - skill_urls.remove(skill['url']) - skill_formatters.remove(skill['formatter']) - continue - s['dialogs'] = compressed_dialogs - payloads.append(s) - skill_responses = self.skill_caller(payload=payloads, names=skill_names, urls=skill_urls, - formatters=skill_formatters) - return skill_responses diff --git a/core/state_manager.py b/core/state_manager.py index 74b3af15..5a132c70 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -7,86 +7,8 @@ from core import VERSION -def get_state(dialogs: Sequence[Dialog]): - state = {'version': VERSION, 'dialogs': []} - for d in dialogs: - state['dialogs'].append(d.to_dict()) - return state - - class StateManager: - @classmethod - def get_or_create_users(cls, user_telegram_ids=Sequence[Hashable], user_device_types=Sequence[Any]): - users = [] - for user_telegram_id, device_type in zip(user_telegram_ids, user_device_types): - user_query = Human.objects(user_telegram_id__exact=user_telegram_id) - if not user_query: - user = cls.create_new_human(user_telegram_id, device_type) - else: - user = user_query[0] - users.append(user) - return users - - @classmethod - def get_or_create_dialogs(cls, users, locations, channel_types, should_reset): - dialogs = [] - for user, loc, channel_type, reset in zip(users, locations, channel_types, should_reset): - if reset: - dialog = cls.create_new_dialog(user=user, - bot=BOT, - location=loc, - channel_type=channel_type) - else: - exist_dialogs = Dialog.objects(user__exact=user) - if not exist_dialogs: - # TODO remove this "if" condition: it should never happen in production, only while testing - dialog = cls.create_new_dialog(user=user, - bot=BOT, - location=loc, - channel_type=channel_type) - else: - dialog = exist_dialogs[0] - - dialogs.append(dialog) - return dialogs - - @classmethod - def add_human_utterances(cls, dialogs: Sequence[Dialog], texts: Sequence[str], date_times: Sequence[datetime], - annotations: Optional[Sequence[dict]] = None, - selected_skills: Optional[Sequence[dict]] = None) -> None: - if annotations is None: - annotations = [None] * len(texts) - - if selected_skills is None: - selected_skills = [None] * len(texts) - - for dialog, text, anno, date_time, ss in zip(dialogs, texts, annotations, date_times, selected_skills): - utterance = cls.create_new_human_utterance(text, dialog.user, date_time, anno, ss) - dialog.utterances.append(utterance) - dialog.save() - - @classmethod - def add_bot_utterances(cls, dialogs: Sequence[Dialog], orig_texts: Sequence[str], texts: Sequence[str], - date_times: Sequence[datetime], active_skills: Sequence[str], - confidences: Sequence[float], annotations: Optional[Sequence[dict]] = None) -> None: - if annotations is None: - annotations = [None] * len(dialogs) - - for dialog, orig_text, text, date_time, active_skill, confidence, anno in zip(dialogs, orig_texts, texts, - date_times, active_skills, - confidences, annotations): - utterance = cls.create_new_bot_utterance(orig_text, text, dialog.bot, date_time, active_skill, confidence, - anno) - dialog.utterances.append(utterance) - dialog.save() - - @staticmethod - def add_annotations(utterances: Sequence[Utterance], annotations: Sequence[Dict]): - for utt, ann in zip(utterances, annotations): - utt.annotations.update(ann) - utt.save() - @staticmethod def create_new_dialog(user, bot, location=None, channel_type=None): dialog = Dialog(user=user, @@ -130,12 +52,86 @@ def create_new_bot_utterance(orig_text, text, user, date_time, active_skill, con utt.save() return utt - @staticmethod - def update_me_object(me_obj, kwargs): - me_obj.modify(**kwargs) - me_obj.save() - @staticmethod def update_user_profile(me_user, profile): me_user.profile.update(**profile) me_user.save() + + # non batch shit + + @classmethod + def get_or_create_user(cls, user_telegram_id=Hashable, user_device_type=Any): + user_query = Human.objects(user_telegram_id__exact=user_telegram_id) + if not user_query: + user = cls.create_new_human(user_telegram_id, user_device_type) + else: + user = user_query[0] + return user + + @classmethod + def get_or_create_dialog(cls, user, location, channel_type, should_reset=False): + if should_reset: + dialog = cls.create_new_dialog(user=user, bot=BOT, location=location, + channel_type=channel_type) + else: + exist_dialogs = Dialog.objects(user__exact=user) + if not exist_dialogs: + # TODO remove this "if" condition: it should never happen in production, only while testing + dialog = cls.create_new_dialog(user=user, bot=BOT, location=location, + channel_type=channel_type) + else: + dialog = exist_dialogs[0] + + return dialog + + @classmethod + def add_human_utterance(cls, dialog: Dialog, text: str, date_time: datetime, + annotation: Optional[dict] = None, + selected_skill: Optional[dict] = None) -> None: + utterance = cls.create_new_human_utterance(text, dialog.user, date_time, annotation, selected_skill) + dialog.utterances.append(utterance) + dialog.save() + + @classmethod + def add_bot_utterance(cls, dialog: Dialog, orig_text: str, + date_time: datetime, active_skill: str, + confidence: float, text: str = None, annotation: Optional[dict] = None) -> None: + if not text: + text = orig_text + utterance = cls.create_new_bot_utterance(orig_text, text, dialog.bot, date_time, active_skill, confidence, + annotation) + dialog.utterances.append(utterance) + dialog.save() + + @staticmethod + def add_annotation(dialog: Dialog, payload: Dict): + dialog.utterances[-1].annotations.update(payload) + dialog.utterances[-1].save() + + @staticmethod + def add_selected_skill(dialog: Dialog, payload: Dict): + if not dialog.utterances[-1].selected_skills: + dialog.utterances[-1].selected_skills = {} + dialog.utterances[-1].selected_skills.update(payload) + dialog.utterances[-1].save() + + @staticmethod + def add_text(dialog: Dialog, payload: str): + dialog.utterances[-1].text = payload + dialog.utterances[-1].save() + + @classmethod + def add_bot_utterance_simple(cls, dialog: Dialog, payload: Dict): + active_skill_name = list(payload.values())[0] + active_skill = dialog.utterances[-1].selected_skills.get(active_skill_name, None) + if not active_skill: + raise ValueError(f'provided {payload} is not valid') + + text = active_skill['text'] + confidence = active_skill['confidence'] + + cls.add_bot_utterance(dialog, text, datetime.now(), active_skill_name, confidence) + + @staticmethod + def do_nothing(*args, **kwargs): # exclusive workaround for skill selector + pass diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 96454d3a..9951a8c0 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -1,7 +1,7 @@ -from typing import Dict, Any +from typing import Dict, Any, List -def base_input_formatter(state: Dict): +def base_input_formatter(state: List): """This state_formatter takes the most popular fields from Agent state and returns them as dict values: * last utterances: a list of last utterance from each dialog in the state * last_annotations: a list of last annotation from each last utterance @@ -22,7 +22,7 @@ def base_input_formatter(state: Dict): dialog_ids = [] user_ids = [] - for dialog in state['dialogs']: + for dialog in state: utterances_history = [] annotations_history = [] for utterance in dialog['utterances']: @@ -37,7 +37,7 @@ def base_input_formatter(state: Dict): dialog_ids.append(dialog['id']) user_ids.append(dialog['user']['id']) - return {'dialogs': state['dialogs'], + return {'dialogs': state, 'last_utterances': last_utterances, 'last_annotations': last_annotations, 'utterances_histories': utterances_histories, From c5710b6b51a9a79c79fe02bd09408adfee435d3a Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 10 Sep 2019 17:25:12 +0300 Subject: [PATCH 002/133] http api --- core/agent.py | 30 ++++++------ core/connectors.py | 14 +++++- core/run.py | 112 +++++++++++++++++++++++++++++++++++++-------- requirements.txt | 1 - 4 files changed, 121 insertions(+), 36 deletions(-) diff --git a/core/agent.py b/core/agent.py index 749ee8cc..a627873b 100644 --- a/core/agent.py +++ b/core/agent.py @@ -20,10 +20,20 @@ def __init__(self, pipeline: Pipeline, state_manager: StateManager, self.process_logger_callable = process_logger_callable self.response_logger_callable = response_logger_callable - def add_workflow_record(self, dialog: Dialog): + def add_workflow_record(self, dialog: Dialog, deadline_timestamp: Optional[float] = None, **kwargs): if dialog.id in self.workflow.keys(): raise ValueError(f'dialog with id {dialog.id} is already in workflow') - self.workflow[dialog.id] = {'dialog': dialog, 'services': defaultdict(dict)} + workflow_record = {'dialog': dialog, 'services': defaultdict(dict)} + if deadline_timestamp: + workflow_record['deadline_timestamp'] = deadline_timestamp + if 'dialog' in kwargs: + raise ValueError("'dialog' key is system reserved") + if 'services' in kwargs: + raise ValueError("'services key' is system reserved") + if 'deadline_timestamp' in kwargs: + raise ValueError("'deadline_timestamp' key is system reserved") + workflow_record.update(kwargs) + self.workflow[dialog.id] = workflow_record def get_workflow_record(self, dialog_id): if dialog_id not in self.workflow.keys(): @@ -35,18 +45,6 @@ def flush_record(self, dialog_id: str): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') return self.workflow.pop(dialog_id) - def save_in_workflow(self, dialog_id: str, key: str, value: Any): - if dialog_id not in self.workflow.keys(): - raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - self.workflow[dialog_id][key] = value - - def get_from_workflow(self, dialog_id: str, key: str): - if dialog_id not in self.workflow.keys(): - raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - if dialog_id not in self.workflow[dialog_id].keys(): - raise ValueError(f'Item with key {key} is not exist in dialog {dialog_id}') - return self.workflow[dialog_id][key] - def register_service_request(self, dialog_id: str, service_name): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') @@ -104,12 +102,12 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res async def register_msg(self, utterance: str, user_telegram_id: Hashable, user_device_type: Any, date_time: datetime, location=Any, - channel_type=str, deadline=None): + channel_type=str, deadline_timestamp=None, **kwargs): user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) dialog = self.state_manager.get_or_create_dialog(user, location, channel_type) self.state_manager.add_human_utterance(dialog, utterance, date_time) - self.add_workflow_record(dialog) + self.add_workflow_record(dialog, deadline_timestamp, **kwargs) await self.process(dialog.id) diff --git a/core/connectors.py b/core/connectors.py index 1fa0a960..9d7ec24c 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -25,11 +25,23 @@ async def send(self, payload: Dict): return {self.name: self.formatter(response[0], mode='out')} -class CmdConnector: +class CmdOutputConnector: async def send(self, payload): print('bot: ', payload['utterances'][-1]['text']) +class HttpOutputConnector: + def __init__(self, intermediate_storage: Dict): + self.intermediate_storage = intermediate_storage + + async def send(self, payload): + message_uuid = payload['message_uuid'] + event = payload['event'] + response_text = payload['dialog'].utterances[-1].text + self.intermediate_storage[message_uuid] = response_text + event.set() + + class ConfidenceResponseSelectorConnector: """Select a single response for each dialog turn WITHOUT MAGIC METHODS. """ diff --git a/core/run.py b/core/run.py index 50e5f067..4c3be79d 100644 --- a/core/run.py +++ b/core/run.py @@ -1,18 +1,26 @@ import asyncio -import aiohttp +import argparse +import uuid +from aiohttp import web from datetime import datetime from core.agent import AsyncAgent from core.pipeline import Pipeline, Service, simple_workflow_formatter -from core.connectors import CmdConnector +from core.connectors import CmdOutputConnector, HttpOutputConnector from core.config_parser import services, worker_tasks, session from core.state_manager import StateManager from core.transform_config import DEBUG -endpoint = Service('cmd_responder', CmdConnector(), None, 1, ['responder'], set(), simple_workflow_formatter) +parser = argparse.ArgumentParser() +parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, + choices=['cmd_client', 'http_client'], default='cmd_client') +parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) +args = parser.parse_args() +CHANNEL = args.channel -def prepare_agent(): + +def prepare_agent(endpoint: Service): pipeline = Pipeline(services) pipeline.add_responder_service(endpoint) @@ -21,24 +29,92 @@ def prepare_agent(): return agent.register_msg -async def run(): - register_func = prepare_agent() +async def run(register_msg): user_id = input('Provide user id: ') while True: msg = input(f'You ({user_id}): ').strip() if msg: - await register_func(msg, user_id, 'cmd', datetime.now(), 'lab', 'cmd_client') + await register_msg(msg, user_id, 'cmd', datetime.now(), 'lab', CHANNEL) + + +async def init_app(register_msg, intermediate_storage): + app = web.Application() + handle_func = await api_message_processor(register_msg, intermediate_storage) + app.router.add_post('/', handle_func) + app.router.add_get('/dialogs', users_dialogs) + app.router.add_get('/dialogs/{dialog_id}', dialog) + return app + + +async def api_message_processor(register_msg, intermediate_storage): + async def api_handle(request): + result = {} + if request.method == 'POST': + if request.headers.get('content-type') != 'application/json': + raise web.HTTPBadRequest(reason='Content-Type should be application/json') + data = await request.json() + user_id = data.get('user_id') + payload = data.get('payload', '') + + if not user_id: + raise web.HTTPBadRequest(reason='user_id key is required') + + event = asyncio.Event() + message_uuid = uuid.uuid3(uuid.NAMESPACE_DNS, f'{user_id}{payload}{datetime.now()}').hex + await register_msg(utterance=payload, user_telegram_id=user_id, user_device_type='http', + date_time=datetime.now(), location='', channel_type=CHANNEL, + event=event, message_uuid=message_uuid) + await event.wait() + bot_response = intermediate_storage.pop(message_uuid) + + return web.json_response({'user_id': user_id, 'response': bot_response}) + + return api_handle + + +async def users_dialogs(request): + from core.state_schema import Dialog + exist_dialogs = Dialog.objects() + result = list() + for i in exist_dialogs: + result.append( + {'id': str(i.id), 'location': i.location, 'channel_type': i.channel_type, 'user': i.user.to_dict()}) + return web.json_response(result) + + +async def dialog(request): + from core.state_schema import Dialog + dialog_id = request.match_info['dialog_id'] + if dialog_id == 'all': + dialogs = Dialog.objects() + return web.json_response([i.to_dict() for i in dialogs]) + elif len(dialog_id) == 24 and all(c in hexdigits for c in dialog_id): + dialog = Dialog.objects(id__exact=dialog_id) + if not dialog: + raise web.HTTPNotFound(reason=f'dialog with id {dialog_id} is not exist') + else: + return web.json_response(dialog[0].to_dict()) + else: + raise web.HTTPBadRequest(reason='dialog id should be 24-character hex string') if __name__ == '__main__': - loop = asyncio.get_event_loop() - loop.set_debug(DEBUG) - future = asyncio.ensure_future(run()) - try: - loop.run_until_complete(future) - except Exception as e: - raise e - finally: - loop.run_until_complete(asyncio.gather(session.close())) - loop.stop() - loop.close() + if CHANNEL == 'cmd_client': + endpoint = Service('cmd_responder', CmdOutputConnector(), None, 1, ['responder'], set(), simple_workflow_formatter) + loop = asyncio.get_event_loop() + loop.add_signal_handler(signal.SIGTERM, handler, loop) + loop.set_debug(DEBUG) + future = asyncio.ensure_future(run(prepare_agent(endpoint))) + try: + loop.run_until_complete(future) + except Exception as e: + raise e + finally: + loop.run_until_complete(asyncio.gather(session.close())) + loop.close() + elif CHANNEL == 'http_client': + intermediate_storage = {} + endpoint = Service('cmd_responder', HttpOutputConnector(intermediate_storage), None, 1, ['responder']) + register_msg = prepare_agent(endpoint) + app = init_app(register_msg, intermediate_storage) + web.run_app(app, port=args.port) diff --git a/requirements.txt b/requirements.txt index 79055fbf..6372c829 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ pytelegrambotapi==3.5.2 mongoengine==0.17.0 -requests[socks]==2.10.0 aiohttp==3.5.4 pyyaml \ No newline at end of file From cc67fa6b326b551d4ac23cfc082599af3cfeba0c Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Thu, 12 Sep 2019 16:37:46 +0300 Subject: [PATCH 003/133] batches, parallel (not tested properly), fixes in tasks --- core/agent.py | 36 ++++---- core/config_parser.py | 192 ++++++++++++++++++++++-------------------- core/connectors.py | 52 +++++++++--- core/run.py | 45 +++++++--- 4 files changed, 192 insertions(+), 133 deletions(-) diff --git a/core/agent.py b/core/agent.py index a627873b..2d26dac0 100644 --- a/core/agent.py +++ b/core/agent.py @@ -21,24 +21,20 @@ def __init__(self, pipeline: Pipeline, state_manager: StateManager, self.response_logger_callable = response_logger_callable def add_workflow_record(self, dialog: Dialog, deadline_timestamp: Optional[float] = None, **kwargs): - if dialog.id in self.workflow.keys(): + if str(dialog.id) in self.workflow.keys(): raise ValueError(f'dialog with id {dialog.id} is already in workflow') workflow_record = {'dialog': dialog, 'services': defaultdict(dict)} if deadline_timestamp: workflow_record['deadline_timestamp'] = deadline_timestamp - if 'dialog' in kwargs: - raise ValueError("'dialog' key is system reserved") - if 'services' in kwargs: - raise ValueError("'services key' is system reserved") - if 'deadline_timestamp' in kwargs: - raise ValueError("'deadline_timestamp' key is system reserved") + workflow_record.update(kwargs) - self.workflow[dialog.id] = workflow_record + self.workflow[str(dialog.id)] = workflow_record def get_workflow_record(self, dialog_id): - if dialog_id not in self.workflow.keys(): + record = self.workflow.get(dialog_id, None) + if not record: raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - return self.workflow[dialog_id] + return record def flush_record(self, dialog_id: str): if dialog_id not in self.workflow.keys(): @@ -109,18 +105,28 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, self.state_manager.add_human_utterance(dialog, utterance, date_time) self.add_workflow_record(dialog, deadline_timestamp, **kwargs) - await self.process(dialog.id) + await self.process(str(dialog.id)) async def process(self, dialog_id, service_name=None, response=None): workflow_record = self.get_workflow_record(dialog_id) next_services = self.process_service_response(dialog_id, service_name, response) + service_requests = [] + has_responder = False for service in next_services: self.register_service_request(dialog_id, service.name) payload = service.apply_workflow_formatter(workflow_record) - response = await service.connector.send(payload) + service_requests.append(service.connector.send(payload)) if service.is_responder(): - self.flush_record(dialog_id) - break + has_responder = True + + responses = await asyncio.gather(*service_requests, return_exceptions=True) + + tasks = [] + for service, response in zip(next_services, responses): if response is not None: - await self.process(dialog_id, service.name, response) + tasks.append(self.process(dialog_id, service.name, response)) + await asyncio.gather(*tasks) + + if has_responder: + self.flush_record(dialog_id) diff --git a/core/config_parser.py b/core/config_parser.py index e31dc2c7..8eb8959e 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -1,130 +1,136 @@ import aiohttp +import asyncio from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, RESPONSE_SELECTORS, POSTPROCESSORS -from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector +from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, QueueListenerBatchifyer from core.pipeline import Service, simple_workflow_formatter from core.state_manager import StateManager -services = [] -worker_tasks = [] +def parse_old_config(): + services = [] + worker_tasks = [] + session = aiohttp.ClientSession() - -def make_service_from_config_rec(log_record, session, state_processor_method, tags, names_previous_services, name_modifier=None): - worker_task = None - if name_modifier: - name = name_modifier(log_record['name']) - else: - name = log_record['name'] - formatter = log_record['formatter'] - batch_size = log_record.get('batch_size', 1) - if log_record['protocol'] == 'http': - if log_record.get('external', False): - url = f"http://{log_record['host']}:{log_record['port']}/{log_record['endpoint']}" - else: - url = f"http://{log_record['name']}:{log_record['port']}/{log_record['endpoint']}" - if batch_size == 1: - connector = HTTPConnector(session, url, formatter, log_record['name']) + def make_service_from_config_rec(log_record, session, state_processor_method, tags, names_previous_services, name_modifier=None): + worker_tasks = [] + if name_modifier: + name = name_modifier(log_record['name']) else: - pass # worker task and queue connector - - service = Service(name, connector, state_processor_method, batch_size, - tags, names_previous_services, simple_workflow_formatter) - return service, worker_task - - -def add_bot_to_name(name): - return f'bot_{name}' + name = log_record['name'] + formatter = log_record['formatter'] + batch_size = log_record.get('batch_size', 1) + url2 = log_record.get('url2') + if log_record['protocol'] == 'http': + if log_record.get('external', False): + url = f"http://{log_record['host']}:{log_record['port']}/{log_record['endpoint']}" + else: + url = f"http://{log_record['name']}:{log_record['port']}/{log_record['endpoint']}" + if batch_size == 1 and not url2: + connector = HTTPConnector(session, url, formatter, log_record['name']) + else: + queue = asyncio.Queue() + connector = AioQueueConnector(queue) # worker task and queue connector + worker_tasks.append(QueueListenerBatchifyer(session, url, formatter, name, queue, batch_size)) + if url2: + worker_tasks.append(QueueListenerBatchifyer(session, url2, formatter, name, queue, batch_size)) + + service = Service(name, connector, state_processor_method, batch_size, + tags, names_previous_services, simple_workflow_formatter) + + return service, worker_tasks + + def add_bot_to_name(name): + return f'bot_{name}' - -session = aiohttp.ClientSession() - -for anno in ANNOTATORS_1: - service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + for anno in ANNOTATORS_1: + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['ANNOTATORS_1'], set()) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) -previous_services = {i.name for i in services if 'ANNOTATORS_1' in i.tags} + previous_services = {i.name for i in services if 'ANNOTATORS_1' in i.tags} -if ANNOTATORS_2: - for anno in ANNOTATORS_2: - service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + if ANNOTATORS_2: + for anno in ANNOTATORS_2: + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['ANNOTATORS_2'], previous_services) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) - previous_services = {i.name for i in services if 'ANNOTATORS_2' in i.tags} + previous_services = {i.name for i in services if 'ANNOTATORS_2' in i.tags} -if ANNOTATORS_3: - for anno in ANNOTATORS_3: - service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, - ['ANNOTATORS_3'], previous_services) - services.append(service) - worker_tasks.append(worker_task) + if ANNOTATORS_3: + for anno in ANNOTATORS_3: + service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + ['ANNOTATORS_3'], previous_services) + services.append(service) + worker_tasks.extend(workers) - previous_services = {i.name for i in services if 'ANNOTATORS_3' in i.tags} + previous_services = {i.name for i in services if 'ANNOTATORS_3' in i.tags} -if SKILL_SELECTORS: - for ss in SKILL_SELECTORS: - service, worker_task = make_service_from_config_rec(ss, session, StateManager.do_nothing, + if SKILL_SELECTORS: + for ss in SKILL_SELECTORS: + service, workers = make_service_from_config_rec(ss, session, StateManager.do_nothing, ['SKILL_SELECTORS', 'selector'], previous_services) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) - previous_services = {i.name for i in services if 'SKILL_SELECTORS' in i.tags} + previous_services = {i.name for i in services if 'SKILL_SELECTORS' in i.tags} -if SKILLS: - for s in SKILLS: - service, worker_task = make_service_from_config_rec(s, session, StateManager.add_selected_skill, + if SKILLS: + for s in SKILLS: + service, workers = make_service_from_config_rec(s, session, StateManager.add_selected_skill, ['SKILLS'], previous_services) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) - previous_services = {i.name for i in services if 'SKILLS' in i.tags} + previous_services = {i.name for i in services if 'SKILLS' in i.tags} -if not RESPONSE_SELECTORS: - services.append(Service('confidence_response_selector', ConfidenceResponseSelectorConnector(), - StateManager.add_bot_utterance_simple, - 1, ['RESPONSE_SELECTORS'], previous_services, simple_workflow_formatter)) -else: - for r in RESPONSE_SELECTORS: - service, worker_task = make_service_from_config_rec(r, session, StateManager.add_bot_utterance_simple, + if not RESPONSE_SELECTORS: + services.append(Service('confidence_response_selector', ConfidenceResponseSelectorConnector(), + StateManager.add_bot_utterance_simple, + 1, ['RESPONSE_SELECTORS'], previous_services, simple_workflow_formatter)) + else: + for r in RESPONSE_SELECTORS: + service, workers = make_service_from_config_rec(r, session, StateManager.add_bot_utterance_simple, ['RESPONSE_SELECTORS'], previous_services) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) -previous_services = {i.name for i in services if 'RESPONSE_SELECTORS' in i.tags} + previous_services = {i.name for i in services if 'RESPONSE_SELECTORS' in i.tags} -if POSTPROCESSORS: - for p in POSTPROCESSORS: - service, worker_task = make_service_from_config_rec(p, session, StateManager.add_text, + if POSTPROCESSORS: + for p in POSTPROCESSORS: + service, workers = make_service_from_config_rec(p, session, StateManager.add_text, ['POSTPROCESSORS'], previous_services) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) - previous_services = {i.name for i in services if 'POSTPROCESSORS' in i.tags} + previous_services = {i.name for i in services if 'POSTPROCESSORS' in i.tags} -if ANNOTATORS_1: - for anno in ANNOTATORS_1: - service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + if ANNOTATORS_1: + for anno in ANNOTATORS_1: + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['POST_ANNOTATORS_1'], previous_services, add_bot_to_name) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) - previous_services = {i.name for i in services if 'POST_ANNOTATORS_1' in i.tags} + previous_services = {i.name for i in services if 'POST_ANNOTATORS_1' in i.tags} -if ANNOTATORS_2: - for anno in ANNOTATORS_2: - service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + if ANNOTATORS_2: + for anno in ANNOTATORS_2: + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['POST_ANNOTATORS_2'], previous_services, add_bot_to_name) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) - previous_services = {i.name for i in services if 'POST_ANNOTATORS_2' in i.tags} + previous_services = {i.name for i in services if 'POST_ANNOTATORS_2' in i.tags} -for anno in ANNOTATORS_3: - service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['POST_ANNOTATORS_3'], + for anno in ANNOTATORS_3: + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['POST_ANNOTATORS_3'], previous_services, add_bot_to_name) - services.append(service) - worker_tasks.append(worker_task) + services.append(service) + worker_tasks.extend(workers) + + return services, worker_tasks, session diff --git a/core/connectors.py b/core/connectors.py index 9d7ec24c..7eb2c78a 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -2,27 +2,26 @@ import aiohttp from typing import Dict, List -''' -class AioQueueConnector: - def __init__(self, queue): - self.queue = queue - await self.queue.join() - - async def send(self, payload: Dict): - await self.queue.put(payload) -''' class HTTPConnector: - def __init__(self, session, url, formatter, name): + def __init__(self, session, url, formatter, service_name): self.session = session self.url = url self.formatter = formatter - self.name = name + self.service_name = service_name async def send(self, payload: Dict): async with self.session.post(self.url, json=self.formatter([payload])) as resp: response = await resp.json() - return {self.name: self.formatter(response[0], mode='out')} + return {self.service_name: self.formatter(response[0], mode='out')} + + +class AioQueueConnector: + def __init__(self, queue): + self.queue = queue + + async def send(self, payload: Dict): + await self.queue.put(payload) class CmdOutputConnector: @@ -43,10 +42,35 @@ async def send(self, payload): class ConfidenceResponseSelectorConnector: - """Select a single response for each dialog turn WITHOUT MAGIC METHODS. - """ async def send(self, payload: Dict): skill_name = '' response = payload['utterances'][-1]['selected_skills'] skill_name = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0][0] return {'confidence_response_selector': skill_name} + + +class QueueListenerBatchifyer: + def __init__(self, session, url, formatter, service_name, queue, batch_size): + self.session = session + self.url = url + self.formatter = formatter + self.service_name = service_name + self.queue = queue + self.batch_size = batch_size + + async def call_service(self, process_callable): + while True: + batch = [] + rest = self.queue.qsize() + for i in range(min(self.batch_size, rest)): + item = await self.queue.get() + batch.append(item) + if batch: + tasks = [] + async with self.session.post(self.url, json=self.formatter(batch)) as resp: + response = await resp.json() + for dialog, response_text in zip(batch, response): + tasks.append(process_callable(dialog['id'], self.service_name, + {self.service_name: self.formatter(response_text, mode='out')})) + await asyncio.gather(*tasks) + await asyncio.sleep(0.1) diff --git a/core/run.py b/core/run.py index 4c3be79d..71f8d7e3 100644 --- a/core/run.py +++ b/core/run.py @@ -4,10 +4,11 @@ from aiohttp import web from datetime import datetime + from core.agent import AsyncAgent from core.pipeline import Pipeline, Service, simple_workflow_formatter from core.connectors import CmdOutputConnector, HttpOutputConnector -from core.config_parser import services, worker_tasks, session +from core.config_parser import parse_old_config from core.state_manager import StateManager from core.transform_config import DEBUG @@ -20,13 +21,12 @@ CHANNEL = args.channel -def prepare_agent(endpoint: Service): +def prepare_agent(services, endpoint: Service): pipeline = Pipeline(services) pipeline.add_responder_service(endpoint) - agent = AsyncAgent(pipeline, StateManager) - return agent.register_msg + return agent.register_msg, agent.process async def run(register_msg): @@ -37,15 +37,33 @@ async def run(register_msg): await register_msg(msg, user_id, 'cmd', datetime.now(), 'lab', CHANNEL) -async def init_app(register_msg, intermediate_storage): - app = web.Application() +async def init_app(register_msg, intermediate_storage, on_startup, on_shutdown): + app = web.Application(debug=True) handle_func = await api_message_processor(register_msg, intermediate_storage) app.router.add_post('/', handle_func) app.router.add_get('/dialogs', users_dialogs) app.router.add_get('/dialogs/{dialog_id}', dialog) + app.on_startup.append(on_startup) + app.on_shutdown.append(on_shutdown) return app +def prepare_startup(consumers, process_callable, session): + result = [] + for i in consumers: + result.append(asyncio.ensure_future(i.call_service(process_callable))) + + async def startup_background_tasks(app): + app['consumers'] = result + app['client_session'] = session + + return startup_background_tasks + + +async def on_shutdown(app): + await app['client_session'].close() + + async def api_message_processor(register_msg, intermediate_storage): async def api_handle(request): result = {} @@ -99,12 +117,16 @@ async def dialog(request): if __name__ == '__main__': + services, workers, session = parse_old_config() + if CHANNEL == 'cmd_client': endpoint = Service('cmd_responder', CmdOutputConnector(), None, 1, ['responder'], set(), simple_workflow_formatter) loop = asyncio.get_event_loop() - loop.add_signal_handler(signal.SIGTERM, handler, loop) loop.set_debug(DEBUG) - future = asyncio.ensure_future(run(prepare_agent(endpoint))) + register_msg, process = prepare_agent(services, endpoint) + future = asyncio.ensure_future(run(register_msg)) + for i in workers: + loop.create_task(i.call_service(process)) try: loop.run_until_complete(future) except Exception as e: @@ -114,7 +136,8 @@ async def dialog(request): loop.close() elif CHANNEL == 'http_client': intermediate_storage = {} - endpoint = Service('cmd_responder', HttpOutputConnector(intermediate_storage), None, 1, ['responder']) - register_msg = prepare_agent(endpoint) - app = init_app(register_msg, intermediate_storage) + endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage), None, 1, ['responder']) + register_msg, process_callable = prepare_agent(services, endpoint) + app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), on_shutdown) + web.run_app(app, port=args.port) From e5a9f8e780eacb1239a134c01a4ec4b53f4fd667 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Thu, 12 Sep 2019 17:46:57 +0300 Subject: [PATCH 004/133] some fixes, parallelisation tested --- core/config_parser.py | 2 +- core/run.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/config_parser.py b/core/config_parser.py index 8eb8959e..06540252 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -20,7 +20,7 @@ def make_service_from_config_rec(log_record, session, state_processor_method, ta name = log_record['name'] formatter = log_record['formatter'] batch_size = log_record.get('batch_size', 1) - url2 = log_record.get('url2') + url2 = log_record.get('url2', None) if log_record['protocol'] == 'http': if log_record.get('external', False): url = f"http://{log_record['host']}:{log_record['port']}/{log_record['endpoint']}" diff --git a/core/run.py b/core/run.py index 71f8d7e3..407c042c 100644 --- a/core/run.py +++ b/core/run.py @@ -4,6 +4,7 @@ from aiohttp import web from datetime import datetime +from string import hexdigits from core.agent import AsyncAgent from core.pipeline import Pipeline, Service, simple_workflow_formatter From f944407d4d1c300bbf1c66a9e2285f5a990eed53 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Thu, 12 Sep 2019 17:53:49 +0300 Subject: [PATCH 005/133] some code cleaning --- core/pipeline.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/core/pipeline.py b/core/pipeline.py index 3afb7294..e20051c4 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -1,4 +1,4 @@ -from collections import defaultdict +from collections import defaultdict, Counter class Service: @@ -25,17 +25,10 @@ def apply_workflow_formatter(self, workflow_record): return workflow_record return self.workflow_formatter(workflow_record) - def __repr__(self): - return self.name - class Pipeline: def __init__(self, services): - service_names = [i.name for i in services] - counter = defaultdict(int) - for i in service_names: - counter[i] += 1 - wrong_names = [k for k, v in counter.items() if v != 1] + wrong_names = [k for k, v in Counter([i.name for i in services]).items() if v != 1] if wrong_names: raise ValueError(f'there are some duplicate service names presented {wrong_names}') @@ -47,9 +40,11 @@ def __init__(self, services): def get_service_by_name(self, service_name): if not service_name: return None - if service_name not in self.services: + + service = self.services.get(service_name, None) + if not service: raise ValueError(f'service {service_name} does not exist') - return self.services[service_name] + return service def process_service_names(self): wrong_names = defaultdict(list) From 17b6fd0e3f179ecfa64dbb2ecb3f30dfaa93c15e Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Thu, 12 Sep 2019 18:58:48 +0300 Subject: [PATCH 006/133] fix remove waste variable from ConfidenceResponseSelectorConnector --- core/connectors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/connectors.py b/core/connectors.py index 7eb2c78a..7be58463 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -43,7 +43,6 @@ async def send(self, payload): class ConfidenceResponseSelectorConnector: async def send(self, payload: Dict): - skill_name = '' response = payload['utterances'][-1]['selected_skills'] skill_name = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0][0] return {'confidence_response_selector': skill_name} From 19c2b2442bbbc3fc01a2c683e5fd7ea540f2baec Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Fri, 13 Sep 2019 13:57:47 +0300 Subject: [PATCH 007/133] fix with shitty config parser for local agent runs --- core/config_parser.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index 06540252..94227525 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -12,22 +12,19 @@ def parse_old_config(): worker_tasks = [] session = aiohttp.ClientSession() - def make_service_from_config_rec(log_record, session, state_processor_method, tags, names_previous_services, name_modifier=None): + def make_service_from_config_rec(conf_record, session, state_processor_method, tags, names_previous_services, name_modifier=None): worker_tasks = [] if name_modifier: - name = name_modifier(log_record['name']) + name = name_modifier(conf_record['name']) else: - name = log_record['name'] - formatter = log_record['formatter'] - batch_size = log_record.get('batch_size', 1) - url2 = log_record.get('url2', None) - if log_record['protocol'] == 'http': - if log_record.get('external', False): - url = f"http://{log_record['host']}:{log_record['port']}/{log_record['endpoint']}" - else: - url = f"http://{log_record['name']}:{log_record['port']}/{log_record['endpoint']}" + name = conf_record['name'] + formatter = conf_record['formatter'] + batch_size = conf_record.get('batch_size', 1) + url = conf_record['url'] + url2 = conf_record.get('url2', None) + if conf_record['protocol'] == 'http': if batch_size == 1 and not url2: - connector = HTTPConnector(session, url, formatter, log_record['name']) + connector = HTTPConnector(session, url, formatter, conf_record['name']) else: queue = asyncio.Queue() connector = AioQueueConnector(queue) # worker task and queue connector From 359278c1b568a0d03687560d5a992a04cd502b9f Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Sat, 14 Sep 2019 18:08:37 +0300 Subject: [PATCH 008/133] fix: dp 0.6.0 compatibility --- config.py | 8 ++++---- docker-compose.yml | 8 ++++---- state_formatters/dp_formatters.py | 28 ++++++++++++++++++---------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/config.py b/config.py index 77394bcd..74102fd6 100644 --- a/config.py +++ b/config.py @@ -15,7 +15,7 @@ "protocol": "http", "host": "127.0.0.1", "port": 2080, - "endpoint": "odqa", + "endpoint": "model", "path": "odqa/ru_odqa_infer_wiki", "env": { "CUDA_VISIBLE_DEVICES": "" @@ -45,7 +45,7 @@ "protocol": "http", "host": "127.0.0.1", "port": 2083, - "endpoint": "ner", + "endpoint": "model", "path": "ner/ner_rus", "env": { "CUDA_VISIBLE_DEVICES": "" @@ -61,7 +61,7 @@ "protocol": "http", "host": "127.0.0.1", "port": 2084, - "endpoint": "intents", + "endpoint": "model", "path": "classifiers/rusentiment_cnn", "env": { "CUDA_VISIBLE_DEVICES": "" @@ -79,7 +79,7 @@ "protocol": "http", "host": "127.0.0.1", "port": 2082, - "endpoint": "intents", + "endpoint": "model", "path": "classifiers/rusentiment_bigru_superconv", "env": { "CUDA_VISIBLE_DEVICES": "" diff --git a/docker-compose.yml b/docker-compose.yml index f7e07518..ab261150 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -39,7 +39,7 @@ services: chitchat_odqa: build: args: - skill_endpoint: intents + skill_endpoint: model skillconfig: classifiers/rusentiment_bigru_superconv skillhost: 0.0.0.0 skillport: 2082 @@ -66,7 +66,7 @@ services: ner: build: args: - skill_endpoint: ner + skill_endpoint: model skillconfig: ner/ner_rus skillhost: 0.0.0.0 skillport: 2083 @@ -85,7 +85,7 @@ services: odqa: build: args: - skill_endpoint: odqa + skill_endpoint: model skillconfig: odqa/ru_odqa_infer_wiki skillhost: 0.0.0.0 skillport: 2080 @@ -104,7 +104,7 @@ services: sentiment: build: args: - skill_endpoint: intents + skill_endpoint: model skillconfig: classifiers/rusentiment_cnn skillhost: 0.0.0.0 skillport: 2084 diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 9951a8c0..a41fab29 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -1,7 +1,7 @@ -from typing import Dict, Any, List +from typing import Dict, Any -def base_input_formatter(state: List): +def base_input_formatter(state: Dict): """This state_formatter takes the most popular fields from Agent state and returns them as dict values: * last utterances: a list of last utterance from each dialog in the state * last_annotations: a list of last annotation from each last utterance @@ -64,14 +64,14 @@ def base_skill_output_formatter(payload): "confidence": payload[1]} -def base_annotator_formatter(payload: Any, model_args_names=('context',), mode='in'): +def base_annotator_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': return payload -def ner_formatter(payload: Any, model_args_names=('context',), mode='in'): +def ner_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': @@ -79,14 +79,14 @@ def ner_formatter(payload: Any, model_args_names=('context',), mode='in'): 'tags': payload[1]} -def sentiment_formatter(payload: Any, model_args_names=('context',), mode='in'): +def sentiment_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': return [el[0] for el in payload] -def chitchat_odqa_formatter(payload: Any, model_args_names=('context',), mode='in'): +def chitchat_odqa_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': @@ -100,7 +100,7 @@ def chitchat_odqa_formatter(payload: Any, model_args_names=('context',), mode='i return response -def odqa_formatter(payload: Any, model_args_names=('context',), mode='in'): +def odqa_formatter(payload: Any, model_args_names=('question_raw',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': @@ -108,9 +108,17 @@ def odqa_formatter(payload: Any, model_args_names=('context',), mode='in'): "confidence": 0.5} -def chitchat_formatter(payload: Any, - model_args_names=("utterances", 'annotations', 'u_histories', 'dialogs'), - mode='in'): +def chitchat_formatter(payload: Any, model_args_names=('q',), mode='in'): + if mode == 'in': + return last_utterances(payload, model_args_names) + elif mode == 'out': + return {"text": payload[0], + "confidence": 0.5} + + +def chitchat_example_formatter(payload: Any, + model_args_names=("utterances", 'annotations', 'u_histories', 'dialogs'), + mode='in'): if mode == 'in': parsed = base_input_formatter(payload) return {model_args_names[0]: parsed['last_utterances'], From d9f1a64b443d673dc44b1104e6702fe74b56baf6 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Sat, 14 Sep 2019 19:49:32 +0300 Subject: [PATCH 009/133] refactor: rename AsyncAgent --> Agent --- core/agent.py | 2 +- core/run.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/agent.py b/core/agent.py index 2d26dac0..c00db537 100644 --- a/core/agent.py +++ b/core/agent.py @@ -10,7 +10,7 @@ from core.state_schema import Dialog -class AsyncAgent: +class Agent: def __init__(self, pipeline: Pipeline, state_manager: StateManager, process_logger_callable: Optional[Callable] = None, response_logger_callable: Optional[Callable] = None): diff --git a/core/run.py b/core/run.py index 407c042c..770e7ab0 100644 --- a/core/run.py +++ b/core/run.py @@ -6,7 +6,7 @@ from datetime import datetime from string import hexdigits -from core.agent import AsyncAgent +from core.agent import Agent from core.pipeline import Pipeline, Service, simple_workflow_formatter from core.connectors import CmdOutputConnector, HttpOutputConnector from core.config_parser import parse_old_config @@ -25,7 +25,7 @@ def prepare_agent(services, endpoint: Service): pipeline = Pipeline(services) pipeline.add_responder_service(endpoint) - agent = AsyncAgent(pipeline, StateManager) + agent = Agent(pipeline, StateManager) return agent.register_msg, agent.process From 264ba30652b2688317a81f4b0af9ce255f3c0edd Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Sat, 14 Sep 2019 22:51:52 +0300 Subject: [PATCH 010/133] fix: base_formatter input formatters in out mode --- state_formatters/dp_formatters.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index a41fab29..25b78f00 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -1,7 +1,7 @@ -from typing import Dict, Any +from typing import List, Any -def base_input_formatter(state: Dict): +def base_input_formatter(state: List): """This state_formatter takes the most popular fields from Agent state and returns them as dict values: * last utterances: a list of last utterance from each dialog in the state * last_annotations: a list of last annotation from each last utterance @@ -16,7 +16,7 @@ def base_input_formatter(state: Dict): """ utterances_histories = [] - last_utterances = [] + last_utts = [] annotations_histories = [] last_annotations = [] dialog_ids = [] @@ -29,7 +29,7 @@ def base_input_formatter(state: Dict): utterances_history.append(utterance['text']) annotations_history.append(utterance['annotations']) - last_utterances.append(utterances_history[-1]) + last_utts.append(utterances_history[-1]) utterances_histories.append(utterances_history) last_annotations.append(annotations_history[-1]) annotations_histories.append(annotations_history) @@ -38,7 +38,7 @@ def base_input_formatter(state: Dict): user_ids.append(dialog['user']['id']) return {'dialogs': state, - 'last_utterances': last_utterances, + 'last_utterances': last_utts, 'last_annotations': last_annotations, 'utterances_histories': utterances_histories, 'annotation_histories': annotations_histories, @@ -83,7 +83,7 @@ def sentiment_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': - return [el[0] for el in payload] + return [el for el in payload] def chitchat_odqa_formatter(payload: Any, model_args_names=('x',), mode='in'): @@ -92,7 +92,7 @@ def chitchat_odqa_formatter(payload: Any, model_args_names=('x',), mode='in'): elif mode == 'out': response = [] for el in payload: - class_name = el[0][0] + class_name = el[0] if class_name in ['speech', 'negative']: response.append('chitchat') else: From 7c7b90b154daba4dce4ad8af462e5d5dd773feeb Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Sat, 14 Sep 2019 22:52:20 +0300 Subject: [PATCH 011/133] style: fix minor issues --- core/agent.py | 2 +- core/run.py | 41 +++++++++++++++++++++++++---------------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/core/agent.py b/core/agent.py index c00db537..7309d8e5 100644 --- a/core/agent.py +++ b/core/agent.py @@ -73,7 +73,7 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res service = self.pipeline.get_service_by_name(service_name) if service: self.workflow[dialog_id]['services'][service_name]['done'] = time() - if response: + if response and service.state_processor_method: service.state_processor_method(workflow_record['dialog'], response) # Calculating next steps diff --git a/core/run.py b/core/run.py index 770e7ab0..aea0c387 100644 --- a/core/run.py +++ b/core/run.py @@ -13,7 +13,6 @@ from core.state_manager import StateManager from core.transform_config import DEBUG - parser = argparse.ArgumentParser() parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, choices=['cmd_client', 'http_client'], default='cmd_client') @@ -25,7 +24,7 @@ def prepare_agent(services, endpoint: Service): pipeline = Pipeline(services) pipeline.add_responder_service(endpoint) - agent = Agent(pipeline, StateManager) + agent = Agent(pipeline, StateManager()) return agent.register_msg, agent.process @@ -38,14 +37,18 @@ async def run(register_msg): await register_msg(msg, user_id, 'cmd', datetime.now(), 'lab', CHANNEL) -async def init_app(register_msg, intermediate_storage, on_startup, on_shutdown): +async def on_shutdown(app): + await app['client_session'].close() + + +async def init_app(register_msg, intermediate_storage, on_startup, on_shutdown_func=on_shutdown): app = web.Application(debug=True) handle_func = await api_message_processor(register_msg, intermediate_storage) app.router.add_post('/', handle_func) app.router.add_get('/dialogs', users_dialogs) app.router.add_get('/dialogs/{dialog_id}', dialog) app.on_startup.append(on_startup) - app.on_shutdown.append(on_shutdown) + app.on_shutdown.append(on_shutdown_func) return app @@ -61,13 +64,10 @@ async def startup_background_tasks(app): return startup_background_tasks -async def on_shutdown(app): - await app['client_session'].close() - - async def api_message_processor(register_msg, intermediate_storage): async def api_handle(request): - result = {} + user_id = None + bot_response = None if request.method == 'POST': if request.headers.get('content-type') != 'application/json': raise web.HTTPBadRequest(reason='Content-Type should be application/json') @@ -86,12 +86,15 @@ async def api_handle(request): await event.wait() bot_response = intermediate_storage.pop(message_uuid) + if bot_response is None: + raise RuntimeError('Got None instead of a bot response.') + return web.json_response({'user_id': user_id, 'response': bot_response}) return api_handle -async def users_dialogs(request): +async def users_dialogs(): from core.state_schema import Dialog exist_dialogs = Dialog.objects() result = list() @@ -108,20 +111,21 @@ async def dialog(request): dialogs = Dialog.objects() return web.json_response([i.to_dict() for i in dialogs]) elif len(dialog_id) == 24 and all(c in hexdigits for c in dialog_id): - dialog = Dialog.objects(id__exact=dialog_id) - if not dialog: + d = Dialog.objects(id__exact=dialog_id) + if not d: raise web.HTTPNotFound(reason=f'dialog with id {dialog_id} is not exist') else: - return web.json_response(dialog[0].to_dict()) + return web.json_response(d[0].to_dict()) else: raise web.HTTPBadRequest(reason='dialog id should be 24-character hex string') -if __name__ == '__main__': +def main(): services, workers, session = parse_old_config() if CHANNEL == 'cmd_client': - endpoint = Service('cmd_responder', CmdOutputConnector(), None, 1, ['responder'], set(), simple_workflow_formatter) + endpoint = Service('cmd_responder', CmdOutputConnector(), None, 1, ['responder'], set(), + simple_workflow_formatter) loop = asyncio.get_event_loop() loop.set_debug(DEBUG) register_msg, process = prepare_agent(services, endpoint) @@ -139,6 +143,11 @@ async def dialog(request): intermediate_storage = {} endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage), None, 1, ['responder']) register_msg, process_callable = prepare_agent(services, endpoint) - app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), on_shutdown) + app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), + on_shutdown) web.run_app(app, port=args.port) + + +if __name__ == '__main__': + main() From 5df9b89267f65ef758dce992288f0f63c5cf679c Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Sat, 14 Sep 2019 22:52:56 +0300 Subject: [PATCH 012/133] refactor: rewrite agent batch test to new agent architecture --- utils/agent_batch_test.py | 97 ++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 53 deletions(-) diff --git a/utils/agent_batch_test.py b/utils/agent_batch_test.py index 38f6de7a..8f334195 100644 --- a/utils/agent_batch_test.py +++ b/utils/agent_batch_test.py @@ -1,68 +1,53 @@ from datetime import datetime import uuid import argparse -import os from random import choice +import asyncio + from core.agent import Agent from core.state_manager import StateManager -from core.skill_manager import SkillManager -from core.rest_caller import RestCaller -from models.postprocessor import DefaultPostprocessor -from models.response_selector import ConfidenceResponseSelector -from core.transform_config import MAX_WORKERS, ANNOTATORS, SKILL_SELECTORS, SKILLS, RESPONSE_SELECTORS +from core.pipeline import Pipeline, Service +from core.config_parser import parse_old_config +from core.connectors import HttpOutputConnector import logging logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING) -os.environ["DPA_LAUNCHING_ENV"] = 'local' - parser = argparse.ArgumentParser() parser.add_argument('phrasefile', help='name of the file with phrases for dialog', type=str, default="../utils/ru_test_phrases.txt") -def init_agent(): - state_manager = StateManager() - - preprocessors = [] - for ants in ANNOTATORS: - if ants: - anno_names, anno_urls, anno_formatters = zip( - *[(a['name'], a['url'], a['formatter']) for a in ants]) - else: - anno_names, anno_urls, anno_formatters = [], [], [] - preprocessors.append(RestCaller(max_workers=MAX_WORKERS, names=anno_names, urls=anno_urls, - formatters=anno_formatters)) - postprocessor = DefaultPostprocessor() - skill_caller = RestCaller(max_workers=MAX_WORKERS) - - if RESPONSE_SELECTORS: - rs_names, rs_urls, rs_formatters = zip( - *[(rs['name'], rs['url'], rs['formatter']) for rs in RESPONSE_SELECTORS]) - response_selector = RestCaller(max_workers=MAX_WORKERS, names=rs_names, urls=rs_urls, - formatters=rs_formatters) - else: - response_selector = ConfidenceResponseSelector() - - skill_selector = None - if SKILL_SELECTORS: - ss_names, ss_urls, ss_formatters = zip( - *[(ss['name'], ss['url'], ss['formatter']) for ss in SKILL_SELECTORS]) - skill_selector = RestCaller(max_workers=MAX_WORKERS, names=ss_names, urls=ss_urls, - formatters=ss_formatters) - - skill_manager = SkillManager(skill_selector=skill_selector, response_selector=response_selector, - skill_caller=skill_caller, - profile_handlers=[skill['name'] for skill in SKILLS - if skill.get('profile_handler')]) - - agent = Agent(state_manager, preprocessors, postprocessor, skill_manager) - return agent - - -def main(): +def init_agent(intermediate_storage): + services, workers, session = parse_old_config() + pipeline = Pipeline(services) + endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage), None, 1, ['responder']) + pipeline.add_responder_service(endpoint) + agent = Agent(pipeline, StateManager()) + return agent, session + + +class DummyOutputConnector: + def __init__(self, intermediate_storage): + self.intermediate_storage = intermediate_storage + + async def send(self, payload): + self.intermediate_storage[payload['message_uuid']] = payload + payload['event'].set() + + +async def process_message_return_event(agent, phrase, u_tg_id, u_d_type, date_time, location, ch_type, intermediate_storage): + event = asyncio.Event() + message_uuid = uuid.uuid4().hex + await agent.register_msg(utterance=phrase, user_telegram_id=u_tg_id, user_device_type=u_d_type, date_time=date_time, + location=location, channel_type=ch_type, event=event, message_uuid=message_uuid) + await event.wait() + return intermediate_storage.pop(message_uuid) + + +async def main(): args = parser.parse_args() with open(args.phrasefile, 'r') as file: phrases = [line.rstrip('\n') for line in file] @@ -73,13 +58,19 @@ def main(): date_times = [datetime.utcnow()] * length locations = [choice(['moscow', 'novosibirsk', 'novokuznetsk']) for _ in range(length)] ch_types = ['cmd_client'] * length + intermediate_storage = {} + agent, session = init_agent(intermediate_storage) + result = [] + for u, u_tg_id, u_d_type, dt, loc, ch_t in zip(phrases, u_tg_ids, u_d_types, date_times, locations, ch_types): + response = await process_message_return_event(agent, u, u_tg_id, u_d_type, dt, loc, ch_t, intermediate_storage) + result.append(response) - agent = init_agent() + await session.close() - responses = agent(utterances=phrases, user_telegram_ids=u_tg_ids, user_device_types=u_d_types, - date_times=date_times, locations=locations, channel_types=ch_types) - return responses + return result if __name__ == "__main__": - print(main()) + loop = asyncio.get_event_loop() + result = loop.run_until_complete(main()) + print(result) From 393d710923454441a17daad19a585db8e30176f5 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Mon, 16 Sep 2019 15:36:48 +0300 Subject: [PATCH 013/133] add "require-return" param for register_msg method. Fix tests, connectors are added --- core/agent.py | 23 +++++++++++++---- core/connectors.py | 53 +++++++++++++++++++++------------------ utils/agent_batch_test.py | 30 +++++----------------- 3 files changed, 53 insertions(+), 53 deletions(-) diff --git a/core/agent.py b/core/agent.py index 7309d8e5..123d2546 100644 --- a/core/agent.py +++ b/core/agent.py @@ -98,15 +98,28 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res async def register_msg(self, utterance: str, user_telegram_id: Hashable, user_device_type: Any, date_time: datetime, location=Any, - channel_type=str, deadline_timestamp=None, **kwargs): - + channel_type=str, deadline_timestamp=None, + require_response = False, **kwargs): + event = None + message_uuid = None + hold_flush = False user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) dialog = self.state_manager.get_or_create_dialog(user, location, channel_type) self.state_manager.add_human_utterance(dialog, utterance, date_time) - self.add_workflow_record(dialog, deadline_timestamp, **kwargs) - + if require_response: + event = asyncio.Event() + hold_flush = True + self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, + event=event, hold_flush=hold_flush, **kwargs) await self.process(str(dialog.id)) + if require_response: + await event.wait() + workflow_record = self.get_workflow_record(str(dialog.id)) + self.flush_record(str(dialog.id)) + return workflow_record + + async def process(self, dialog_id, service_name=None, response=None): workflow_record = self.get_workflow_record(dialog_id) next_services = self.process_service_response(dialog_id, service_name, response) @@ -128,5 +141,5 @@ async def process(self, dialog_id, service_name=None, response=None): tasks.append(self.process(dialog_id, service.name, response)) await asyncio.gather(*tasks) - if has_responder: + if has_responder and not workflow_record.get('hold_flush', False): self.flush_record(dialog_id) diff --git a/core/connectors.py b/core/connectors.py index 7be58463..9c3199e2 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -24,30 +24,6 @@ async def send(self, payload: Dict): await self.queue.put(payload) -class CmdOutputConnector: - async def send(self, payload): - print('bot: ', payload['utterances'][-1]['text']) - - -class HttpOutputConnector: - def __init__(self, intermediate_storage: Dict): - self.intermediate_storage = intermediate_storage - - async def send(self, payload): - message_uuid = payload['message_uuid'] - event = payload['event'] - response_text = payload['dialog'].utterances[-1].text - self.intermediate_storage[message_uuid] = response_text - event.set() - - -class ConfidenceResponseSelectorConnector: - async def send(self, payload: Dict): - response = payload['utterances'][-1]['selected_skills'] - skill_name = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0][0] - return {'confidence_response_selector': skill_name} - - class QueueListenerBatchifyer: def __init__(self, session, url, formatter, service_name, queue, batch_size): self.session = session @@ -73,3 +49,32 @@ async def call_service(self, process_callable): {self.service_name: self.formatter(response_text, mode='out')})) await asyncio.gather(*tasks) await asyncio.sleep(0.1) + + +class ConfidenceResponseSelectorConnector: + async def send(self, payload: Dict): + response = payload['utterances'][-1]['selected_skills'] + skill_name = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0][0] + return {'confidence_response_selector': skill_name} + + +class CmdOutputConnector: + async def send(self, payload): + print('bot: ', payload['utterances'][-1]['text']) + + +class HttpOutputConnector: + def __init__(self, intermediate_storage: Dict): + self.intermediate_storage = intermediate_storage + + async def send(self, payload): + message_uuid = payload['message_uuid'] + event = payload['event'] + response_text = payload['dialog'].utterances[-1].text + self.intermediate_storage[message_uuid] = response_text + event.set() + + +class EventSetOutputConnector: + async def send(self, payload): + payload['event'].set() diff --git a/utils/agent_batch_test.py b/utils/agent_batch_test.py index 8f334195..9716e2d3 100644 --- a/utils/agent_batch_test.py +++ b/utils/agent_batch_test.py @@ -9,7 +9,7 @@ from core.state_manager import StateManager from core.pipeline import Pipeline, Service from core.config_parser import parse_old_config -from core.connectors import HttpOutputConnector +from core.connectors import EventSetOutputConnector import logging @@ -20,33 +20,15 @@ default="../utils/ru_test_phrases.txt") -def init_agent(intermediate_storage): +def init_agent(): services, workers, session = parse_old_config() pipeline = Pipeline(services) - endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage), None, 1, ['responder']) + endpoint = Service('http_responder', EventSetOutputConnector(), None, 1, ['responder']) pipeline.add_responder_service(endpoint) agent = Agent(pipeline, StateManager()) return agent, session -class DummyOutputConnector: - def __init__(self, intermediate_storage): - self.intermediate_storage = intermediate_storage - - async def send(self, payload): - self.intermediate_storage[payload['message_uuid']] = payload - payload['event'].set() - - -async def process_message_return_event(agent, phrase, u_tg_id, u_d_type, date_time, location, ch_type, intermediate_storage): - event = asyncio.Event() - message_uuid = uuid.uuid4().hex - await agent.register_msg(utterance=phrase, user_telegram_id=u_tg_id, user_device_type=u_d_type, date_time=date_time, - location=location, channel_type=ch_type, event=event, message_uuid=message_uuid) - await event.wait() - return intermediate_storage.pop(message_uuid) - - async def main(): args = parser.parse_args() with open(args.phrasefile, 'r') as file: @@ -59,11 +41,11 @@ async def main(): locations = [choice(['moscow', 'novosibirsk', 'novokuznetsk']) for _ in range(length)] ch_types = ['cmd_client'] * length intermediate_storage = {} - agent, session = init_agent(intermediate_storage) + agent, session = init_agent() result = [] for u, u_tg_id, u_d_type, dt, loc, ch_t in zip(phrases, u_tg_ids, u_d_types, date_times, locations, ch_types): - response = await process_message_return_event(agent, u, u_tg_id, u_d_type, dt, loc, ch_t, intermediate_storage) - result.append(response) + response = await agent.register_msg(u, u_tg_id, u_d_type, dt, loc, ch_t, None, True) + result.append(response['dialog'].utterances[-1].text) await session.close() From 419b96e766781e9b59750c0e8b1011ce8c7a0413 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 16 Sep 2019 15:44:42 +0300 Subject: [PATCH 014/133] fix: remove unused variable, rename variable --- utils/agent_batch_test.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/utils/agent_batch_test.py b/utils/agent_batch_test.py index 9716e2d3..47d6b168 100644 --- a/utils/agent_batch_test.py +++ b/utils/agent_batch_test.py @@ -40,16 +40,15 @@ async def main(): date_times = [datetime.utcnow()] * length locations = [choice(['moscow', 'novosibirsk', 'novokuznetsk']) for _ in range(length)] ch_types = ['cmd_client'] * length - intermediate_storage = {} agent, session = init_agent() - result = [] + res = [] for u, u_tg_id, u_d_type, dt, loc, ch_t in zip(phrases, u_tg_ids, u_d_types, date_times, locations, ch_types): response = await agent.register_msg(u, u_tg_id, u_d_type, dt, loc, ch_t, None, True) - result.append(response['dialog'].utterances[-1].text) + res.append(response['dialog'].utterances[-1].text) await session.close() - return result + return res if __name__ == "__main__": From 4b65bb47c68cc72ffcffdda90c514338a086b7f2 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 17 Sep 2019 11:51:48 +0300 Subject: [PATCH 015/133] cmd connector refactoring, exceptions --- core/agent.py | 3 ++- core/connectors.py | 14 ++++++-------- core/run.py | 22 +++++++++++++++------- requirements.txt | 3 ++- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/core/agent.py b/core/agent.py index 123d2546..51aca7cf 100644 --- a/core/agent.py +++ b/core/agent.py @@ -112,7 +112,6 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, event=event, hold_flush=hold_flush, **kwargs) await self.process(str(dialog.id)) - if require_response: await event.wait() workflow_record = self.get_workflow_record(str(dialog.id)) @@ -138,6 +137,8 @@ async def process(self, dialog_id, service_name=None, response=None): tasks = [] for service, response in zip(next_services, responses): if response is not None: + if isinstance(response, Exception): + raise response tasks.append(self.process(dialog_id, service.name, response)) await asyncio.gather(*tasks) diff --git a/core/connectors.py b/core/connectors.py index 9c3199e2..fccbafe5 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -1,10 +1,10 @@ import asyncio import aiohttp -from typing import Dict, List +from typing import Dict, List, Callable class HTTPConnector: - def __init__(self, session, url, formatter, service_name): + def __init__(self, session: aiohttp.ClientSession, url: str, formatter: Callable, service_name: str): self.session = session self.url = url self.formatter = formatter @@ -58,11 +58,6 @@ async def send(self, payload: Dict): return {'confidence_response_selector': skill_name} -class CmdOutputConnector: - async def send(self, payload): - print('bot: ', payload['utterances'][-1]['text']) - - class HttpOutputConnector: def __init__(self, intermediate_storage: Dict): self.intermediate_storage = intermediate_storage @@ -77,4 +72,7 @@ async def send(self, payload): class EventSetOutputConnector: async def send(self, payload): - payload['event'].set() + event = payload.get('event', None) + if not event or not isinstance(event, asyncio.Event): + raise ValueError("'event' key is not presented in payload") + event.set() diff --git a/core/run.py b/core/run.py index aea0c387..db8072dd 100644 --- a/core/run.py +++ b/core/run.py @@ -5,10 +5,11 @@ from aiohttp import web from datetime import datetime from string import hexdigits +from aiohttp_swagger import * from core.agent import Agent from core.pipeline import Pipeline, Service, simple_workflow_formatter -from core.connectors import CmdOutputConnector, HttpOutputConnector +from core.connectors import EventSetOutputConnector, HttpOutputConnector from core.config_parser import parse_old_config from core.state_manager import StateManager from core.transform_config import DEBUG @@ -17,6 +18,7 @@ parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, choices=['cmd_client', 'http_client'], default='cmd_client') parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) +parser.add_argument('-d', '--debug', help='run in debug mode', action='store_true') args = parser.parse_args() CHANNEL = args.channel @@ -34,8 +36,10 @@ async def run(register_msg): while True: msg = input(f'You ({user_id}): ').strip() if msg: - await register_msg(msg, user_id, 'cmd', datetime.now(), 'lab', CHANNEL) - + response = await register_msg(utterance=msg, user_telegram_id=user_id, user_device_type='cmd', + date_time=datetime.now(), location='lab', channel_type=CHANNEL, + deadline_timestamp=None, require_response=True) + print('Bot: ', response['dialog'].utterances[-1].text) async def on_shutdown(app): await app['client_session'].close() @@ -47,6 +51,7 @@ async def init_app(register_msg, intermediate_storage, on_startup, on_shutdown_f app.router.add_post('/', handle_func) app.router.add_get('/dialogs', users_dialogs) app.router.add_get('/dialogs/{dialog_id}', dialog) + setup_swagger(app, swagger_url='/docs') app.on_startup.append(on_startup) app.on_shutdown.append(on_shutdown_func) return app @@ -124,20 +129,23 @@ def main(): services, workers, session = parse_old_config() if CHANNEL == 'cmd_client': - endpoint = Service('cmd_responder', CmdOutputConnector(), None, 1, ['responder'], set(), - simple_workflow_formatter) + endpoint = Service('cmd_responder', EventSetOutputConnector(), None, 1, ['responder'], set()) loop = asyncio.get_event_loop() - loop.set_debug(DEBUG) + loop.set_debug(args.debug) register_msg, process = prepare_agent(services, endpoint) future = asyncio.ensure_future(run(register_msg)) for i in workers: loop.create_task(i.call_service(process)) try: loop.run_until_complete(future) + except KeyboardInterrupt: + pass except Exception as e: raise e finally: - loop.run_until_complete(asyncio.gather(session.close())) + future.cancel() + loop.run_until_complete(session.close()) + loop.stop() loop.close() elif CHANNEL == 'http_client': intermediate_storage = {} diff --git a/requirements.txt b/requirements.txt index 6372c829..0532cde3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ pytelegrambotapi==3.5.2 mongoengine==0.17.0 aiohttp==3.5.4 -pyyaml \ No newline at end of file +aiohttp-swagger==1.0.9 +pyyaml From 2a58f1da5fba813290028029e0f61c75bd880d7c Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 17 Sep 2019 11:57:14 +0300 Subject: [PATCH 016/133] some fixes --- core/config_parser.py | 2 +- core/state_manager.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index 94227525..fd9a75bf 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -59,7 +59,7 @@ def add_bot_to_name(name): if ANNOTATORS_3: for anno in ANNOTATORS_3: - service, worker_task = make_service_from_config_rec(anno, session, StateManager.add_annotation, + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['ANNOTATORS_3'], previous_services) services.append(service) worker_tasks.extend(workers) diff --git a/core/state_manager.py b/core/state_manager.py index 5a132c70..15b11cbb 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -1,10 +1,8 @@ from datetime import datetime -from typing import Sequence, Hashable, Any, Optional, Dict +from typing import Hashable, Any, Optional, Dict -from core.state_schema import Human, Bot, Utterance, HumanUtterance, BotUtterance, Dialog -from core.connection import state_storage from core.bot import BOT -from core import VERSION +from core.state_schema import Human, Bot, HumanUtterance, BotUtterance, Dialog class StateManager: From 4e853099ca5c7f9d1bfcb77a22cb1110879152ce Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 17 Sep 2019 12:39:07 +0300 Subject: [PATCH 017/133] Service class: add connector_callable input parameter --- core/agent.py | 2 +- core/pipeline.py | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/core/agent.py b/core/agent.py index 51aca7cf..6b2a3961 100644 --- a/core/agent.py +++ b/core/agent.py @@ -128,7 +128,7 @@ async def process(self, dialog_id, service_name=None, response=None): for service in next_services: self.register_service_request(dialog_id, service.name) payload = service.apply_workflow_formatter(workflow_record) - service_requests.append(service.connector.send(payload)) + service_requests.append(service.connector_callable(payload)) if service.is_responder(): has_responder = True diff --git a/core/pipeline.py b/core/pipeline.py index e20051c4..419956d2 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -2,15 +2,19 @@ class Service: - def __init__(self, name, connector, state_processor_method, - batch_size=1, tags=None, names_previous_services=None, workflow_formatter=None): + def __init__(self, name, connector=None, state_processor_method=None, + batch_size=1, tags=None, names_previous_services=None, + workflow_formatter=None, connector_callable=None): self.name = name self.batch_size = batch_size - self.connector = connector self.state_processor_method = state_processor_method self.names_previous_services = names_previous_services or set() self.tags = tags or [] self.workflow_formatter = workflow_formatter + if not (connector or connector_callable): + raise ValueError('Either connector or connector_callable should be provided') + self.connector = connector + self._connector_callable = connector_callable self.previous_services = set() self.next_services = set() @@ -25,6 +29,13 @@ def apply_workflow_formatter(self, workflow_record): return workflow_record return self.workflow_formatter(workflow_record) + @property + def connector_callable(self): + if self._connector_callable: + return self._connector_callable + else: + return self.connector.send + class Pipeline: def __init__(self, services): From b6dde2945458f3d0c5f93a99db6c986d5132185d Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 17 Sep 2019 14:20:42 +0300 Subject: [PATCH 018/133] agent_batch_test now sends requests in parallel --- utils/agent_batch_test.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/utils/agent_batch_test.py b/utils/agent_batch_test.py index 47d6b168..0fe21bd5 100644 --- a/utils/agent_batch_test.py +++ b/utils/agent_batch_test.py @@ -35,20 +35,23 @@ async def main(): phrases = [line.rstrip('\n') for line in file] length = len(phrases) - u_tg_ids = [str(uuid.uuid4())] * length u_d_types = [choice(['iphone', 'android']) for _ in range(length)] date_times = [datetime.utcnow()] * length locations = [choice(['moscow', 'novosibirsk', 'novokuznetsk']) for _ in range(length)] ch_types = ['cmd_client'] * length agent, session = init_agent() - res = [] - for u, u_tg_id, u_d_type, dt, loc, ch_t in zip(phrases, u_tg_ids, u_d_types, date_times, locations, ch_types): - response = await agent.register_msg(u, u_tg_id, u_d_type, dt, loc, ch_t, None, True) - res.append(response['dialog'].utterances[-1].text) + tasks = [] + for u, u_d_type, dt, loc, ch_t in zip(phrases, u_d_types, date_times, locations, ch_types): + u_tg_id = uuid.uuid4().hex + tasks.append(agent.register_msg(u, u_tg_id, u_d_type, dt, loc, ch_t, None, True)) + res = await asyncio.gather(*tasks, return_exceptions=True) + for i in res: + if isinstance(i, Exception): + raise i await session.close() - return res + return [i['dialog'].utterances[-1].text for i in res] if __name__ == "__main__": From e7fbc08c0573a17e13f7e86a3dde078290e9cd75 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 17 Sep 2019 14:38:00 +0300 Subject: [PATCH 019/133] fix: remove unnecessary cycle --- utils/agent_batch_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/utils/agent_batch_test.py b/utils/agent_batch_test.py index 0fe21bd5..f11fb458 100644 --- a/utils/agent_batch_test.py +++ b/utils/agent_batch_test.py @@ -44,10 +44,7 @@ async def main(): for u, u_d_type, dt, loc, ch_t in zip(phrases, u_d_types, date_times, locations, ch_types): u_tg_id = uuid.uuid4().hex tasks.append(agent.register_msg(u, u_tg_id, u_d_type, dt, loc, ch_t, None, True)) - res = await asyncio.gather(*tasks, return_exceptions=True) - for i in res: - if isinstance(i, Exception): - raise i + res = await asyncio.gather(*tasks, return_exceptions=False) await session.close() From 30fbfefa921bd42b38991cc51f713e2891ac8cc6 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 17 Sep 2019 15:36:51 +0300 Subject: [PATCH 020/133] doc: add pyyaml version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0532cde3..6d470c25 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ pytelegrambotapi==3.5.2 mongoengine==0.17.0 aiohttp==3.5.4 aiohttp-swagger==1.0.9 -pyyaml +pyyaml==5.1 From 4f5998af643010005de9cbe16b23050027df1f06 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 17 Sep 2019 16:34:39 +0300 Subject: [PATCH 021/133] feat: reformat state schema feat: human and bot under utterance fix: remove redundant print fix: base formatter fix: remove redundant todo --- core/__init__.py | 2 +- core/agent.py | 10 ++-- core/bot.py | 8 --- core/config_parser.py | 4 +- core/state_manager.py | 86 +++++++++++++++++++++---------- core/state_schema.py | 32 ++++++------ models/hardcode_utterances.py | 5 ++ state_formatters/dp_formatters.py | 2 +- 8 files changed, 90 insertions(+), 59 deletions(-) delete mode 100644 core/bot.py diff --git a/core/__init__.py b/core/__init__.py index b5ce99b5..89e51f14 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1 +1 @@ -VERSION = "0.11.1" +STATE_API_VERSION = "0.12" diff --git a/core/agent.py b/core/agent.py index 6b2a3961..ef303fb0 100644 --- a/core/agent.py +++ b/core/agent.py @@ -8,6 +8,7 @@ from core.pipeline import Pipeline from core.state_manager import StateManager from core.state_schema import Dialog +from models.hardcode_utterances import TG_START_UTT class Agent: @@ -99,13 +100,13 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, user_device_type: Any, date_time: datetime, location=Any, channel_type=str, deadline_timestamp=None, - require_response = False, **kwargs): + require_response=False, **kwargs): event = None - message_uuid = None hold_flush = False user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) - dialog = self.state_manager.get_or_create_dialog(user, location, channel_type) - self.state_manager.add_human_utterance(dialog, utterance, date_time) + should_reset = True if utterance == TG_START_UTT else False + dialog = self.state_manager.get_or_create_dialog(user, location, channel_type, should_reset=should_reset) + self.state_manager.add_human_utterance(dialog, user, utterance, date_time) if require_response: event = asyncio.Event() hold_flush = True @@ -118,7 +119,6 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, self.flush_record(str(dialog.id)) return workflow_record - async def process(self, dialog_id, service_name=None, response=None): workflow_record = self.get_workflow_record(dialog_id) next_services = self.process_service_response(dialog_id, service_name, response) diff --git a/core/bot.py b/core/bot.py deleted file mode 100644 index 049c9c35..00000000 --- a/core/bot.py +++ /dev/null @@ -1,8 +0,0 @@ -from core.state_schema import Bot -from core.connection import connect - -try: - BOT = Bot.objects(id__exact='5c7cf00e5c70e839bf9cb115')[0] -except IndexError: - BOT = Bot(id='5c7cf00e5c70e839bf9cb115') - BOT.save() diff --git a/core/config_parser.py b/core/config_parser.py index fd9a75bf..4a636d91 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -86,11 +86,11 @@ def add_bot_to_name(name): if not RESPONSE_SELECTORS: services.append(Service('confidence_response_selector', ConfidenceResponseSelectorConnector(), - StateManager.add_bot_utterance_simple, + StateManager.add_bot_response, 1, ['RESPONSE_SELECTORS'], previous_services, simple_workflow_formatter)) else: for r in RESPONSE_SELECTORS: - service, workers = make_service_from_config_rec(r, session, StateManager.add_bot_utterance_simple, + service, workers = make_service_from_config_rec(r, session, StateManager.add_bot_response, ['RESPONSE_SELECTORS'], previous_services) services.append(service) worker_tasks.extend(workers) diff --git a/core/state_manager.py b/core/state_manager.py index 15b11cbb..c1e07663 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -1,17 +1,17 @@ from datetime import datetime -from typing import Hashable, Any, Optional, Dict +from typing import Hashable, Any, Optional, Dict, TypeVar, List -from core.bot import BOT -from core.state_schema import Human, Bot, HumanUtterance, BotUtterance, Dialog +from core.state_schema import User, Human, Bot, HumanUtterance, BotUtterance, Dialog +from core.connection import connect + +userT = TypeVar('userT', bound=User) class StateManager: @staticmethod - def create_new_dialog(user, bot, location=None, channel_type=None): - dialog = Dialog(user=user, - bot=bot, - location=location or Dialog.location.default, + def create_new_dialog(human_id, location=None, channel_type=None): + dialog = Dialog(human_id=human_id, location=location or Dialog.location.default, channel_type=channel_type) dialog.save() return dialog @@ -26,10 +26,15 @@ def create_new_human(user_telegram_id, device_type, personality=None, profile=No return human @staticmethod - def create_new_human_utterance(text, user, date_time, annotations=None, selected_skills=None): - if isinstance(user, Bot): - raise RuntimeError( - 'Utterances of bots should be created with different method. See create_new_bot_utterance()') + def create_new_bot(persona: Optional[List[str]] = None): + bot = Bot() + if persona: + bot.persona = persona + bot.save() + return bot + + @staticmethod + def create_new_human_utterance(text, user: Human, date_time, annotations=None, selected_skills=None): utt = HumanUtterance(text=text, user=user, date_time=date_time, @@ -50,13 +55,6 @@ def create_new_bot_utterance(orig_text, text, user, date_time, active_skill, con utt.save() return utt - @staticmethod - def update_user_profile(me_user, profile): - me_user.profile.update(**profile) - me_user.save() - - # non batch shit - @classmethod def get_or_create_user(cls, user_telegram_id=Hashable, user_device_type=Any): user_query = Human.objects(user_telegram_id__exact=user_telegram_id) @@ -69,13 +67,12 @@ def get_or_create_user(cls, user_telegram_id=Hashable, user_device_type=Any): @classmethod def get_or_create_dialog(cls, user, location, channel_type, should_reset=False): if should_reset: - dialog = cls.create_new_dialog(user=user, bot=BOT, location=location, + dialog = cls.create_new_dialog(user.id, location=location, channel_type=channel_type) else: - exist_dialogs = Dialog.objects(user__exact=user) + exist_dialogs = Dialog.objects(human_id__exact=user.id) if not exist_dialogs: - # TODO remove this "if" condition: it should never happen in production, only while testing - dialog = cls.create_new_dialog(user=user, bot=BOT, location=location, + dialog = cls.create_new_dialog(user.id, location=location, channel_type=channel_type) else: dialog = exist_dialogs[0] @@ -83,10 +80,10 @@ def get_or_create_dialog(cls, user, location, channel_type, should_reset=False): return dialog @classmethod - def add_human_utterance(cls, dialog: Dialog, text: str, date_time: datetime, + def add_human_utterance(cls, dialog: Dialog, user: Human, text: str, date_time: datetime, annotation: Optional[dict] = None, selected_skill: Optional[dict] = None) -> None: - utterance = cls.create_new_human_utterance(text, dialog.user, date_time, annotation, selected_skill) + utterance = cls.create_new_human_utterance(text, user, date_time, annotation, selected_skill) dialog.utterances.append(utterance) dialog.save() @@ -96,7 +93,11 @@ def add_bot_utterance(cls, dialog: Dialog, orig_text: str, confidence: float, text: str = None, annotation: Optional[dict] = None) -> None: if not text: text = orig_text - utterance = cls.create_new_bot_utterance(orig_text, text, dialog.bot, date_time, active_skill, confidence, + try: + bot = dialog.utterances[-2].user + except IndexError: + bot = cls.create_new_bot() + utterance = cls.create_new_bot_utterance(orig_text, text, bot, date_time, active_skill, confidence, annotation) dialog.utterances.append(utterance) dialog.save() @@ -119,9 +120,10 @@ def add_text(dialog: Dialog, payload: str): dialog.utterances[-1].save() @classmethod - def add_bot_utterance_simple(cls, dialog: Dialog, payload: Dict): + def add_bot_response(cls, dialog: Dialog, payload: Dict): active_skill_name = list(payload.values())[0] - active_skill = dialog.utterances[-1].selected_skills.get(active_skill_name, None) + human_utterance = dialog.utterances[-1] + active_skill = human_utterance.selected_skills.get(active_skill_name, None) if not active_skill: raise ValueError(f'provided {payload} is not valid') @@ -129,7 +131,37 @@ def add_bot_utterance_simple(cls, dialog: Dialog, payload: Dict): confidence = active_skill['confidence'] cls.add_bot_utterance(dialog, text, datetime.now(), active_skill_name, confidence) + cls.update_human(human_utterance.user, active_skill) + cls.update_bot(dialog.utterances[-1].user, active_skill) @staticmethod def do_nothing(*args, **kwargs): # exclusive workaround for skill selector pass + + @staticmethod + def update_human(human: Human, active_skill: Dict): + attributes = active_skill.get('human_attributes', []) + profile = human.profile + if attributes: + for attr_name in attributes: + attr_value = active_skill['human_attributes'][attr_name] + if hasattr(human, attr_name): + setattr(human, attr_name, attr_value) + else: + if attr_name in profile.keys(): + profile[attr_name] = attr_value + else: + human.attributes[attr_name] = attr_value + human.save() + + @staticmethod + def update_bot(bot: Bot, active_skill: Dict): + attributes = active_skill.get('bot_attributes', []) + if attributes: + for attr_name in attributes: + attr_value = active_skill['bot_attributes'][attr_name] + if hasattr(bot, attr_name): + setattr(bot, attr_name, attr_value) + else: + bot.attributes[attr_name] = attr_value + bot.save() diff --git a/core/state_schema.py b/core/state_schema.py index 320c5eeb..6c407dc1 100644 --- a/core/state_schema.py +++ b/core/state_schema.py @@ -1,9 +1,14 @@ +import uuid + from mongoengine import DynamicDocument, ReferenceField, ListField, StringField, DynamicField, \ - DateTimeField, FloatField, DictField + DateTimeField, FloatField, DictField, ObjectIdField + +from . import STATE_API_VERSION class User(DynamicDocument): persona = ListField(default=[]) + attributes = DictField() meta = {'allow_inheritance': True} @@ -12,16 +17,12 @@ def to_dict(self): class Bot(User): - persona = ListField(default=['Мне нравится общаться с людьми.', - 'Пару лет назад я окончила вуз с отличием.', - 'Я работаю в банке.', - 'В свободное время помогаю пожилым людям в благотворительном фонде', - 'Люблю путешествовать']) def to_dict(self): return {'id': str(self.id), 'user_type': 'bot', 'persona': self.persona, + 'attributes': str(self.attributes) } @@ -45,12 +46,14 @@ def to_dict(self): 'user_type': 'human', 'device_type': self.device_type, 'persona': self.persona, - 'profile': self.profile} + 'profile': self.profile, + 'attributes': str(self.attributes) + } class Utterance(DynamicDocument): text = StringField(required=True) - annotations = DictField(default={'ner': {}, 'coref': {}, 'sentiment': {}, 'obscenity': {}}) + annotations = DictField(default={}) user = ReferenceField(User, required=True) date_time = DateTimeField(required=True) @@ -66,7 +69,7 @@ class HumanUtterance(Utterance): def to_dict(self): return {'id': str(self.id), 'text': self.text, - 'user_id': str(self.user.id), + 'user': self.user.to_dict(), 'annotations': self.annotations, 'date_time': str(self.date_time), 'selected_skills': self.selected_skills} @@ -75,7 +78,6 @@ def to_dict(self): class BotUtterance(Utterance): orig_text = StringField() active_skill = StringField() - user = ReferenceField(Bot, required=True) confidence = FloatField() def to_dict(self): @@ -85,7 +87,7 @@ def to_dict(self): 'confidence': self.confidence, 'text': self.text, 'orig_text': self.orig_text, - 'user_id': str(self.user.id), + 'user': self.user.to_dict(), 'annotations': self.annotations, 'date_time': str(self.date_time) } @@ -94,16 +96,16 @@ def to_dict(self): class Dialog(DynamicDocument): location = DynamicField() utterances = ListField(ReferenceField(Utterance), default=[]) - user = ReferenceField(Human, required=True) - bot = ReferenceField(Bot, required=True) channel_type = StringField(choices=['telegram', 'vk', 'facebook', 'cmd_client', 'http_client'], default='telegram') + version = StringField(default=STATE_API_VERSION, required=True) + human_id = ObjectIdField(required=True) def to_dict(self): return { 'id': str(self.id), 'location': self.location, 'utterances': [utt.to_dict() for utt in self.utterances], - 'user': self.user.to_dict(), - 'bot': self.bot.to_dict(), 'channel_type': self.channel_type, + 'human_id': self.human_id } + diff --git a/models/hardcode_utterances.py b/models/hardcode_utterances.py index 0223314e..c06c7f86 100644 --- a/models/hardcode_utterances.py +++ b/models/hardcode_utterances.py @@ -1,2 +1,7 @@ TG_START_UTT = '/start' NOANSWER_UTT = 'Я пока не на все вопросы умею отвечать, спроси что-нибудь другое. :)' +BOT_DEFAULT_PERSONA = ['Мне нравится общаться с людьми.', + 'Пару лет назад я окончила вуз с отличием.', + 'Я работаю в банке.', + 'В свободное время помогаю пожилым людям в благотворительном фонде', + 'Люблю путешествовать'] diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 25b78f00..6cf35c94 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -35,7 +35,7 @@ def base_input_formatter(state: List): annotations_histories.append(annotations_history) dialog_ids.append(dialog['id']) - user_ids.append(dialog['user']['id']) + user_ids.extend([utt['user']['id'] for utt in state[0]['utterances']]) return {'dialogs': state, 'last_utterances': last_utts, From 38c3e50cca11fb2429954d7179942bd40c829de9 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 20 Sep 2019 19:32:15 +0300 Subject: [PATCH 022/133] docs: services api and user/bot state api --- docs/source/api/services_http_api.rst | 111 ++++++++++++++++++++++++++ docs/source/api/user_state_api.rst | 36 +++++++++ docs/source/conf.py | 3 +- docs/source/index.rst | 18 +++-- docs/source/intro/overview.rst | 8 +- 5 files changed, 166 insertions(+), 10 deletions(-) create mode 100644 docs/source/api/services_http_api.rst create mode 100644 docs/source/api/user_state_api.rst diff --git a/docs/source/api/services_http_api.rst b/docs/source/api/services_http_api.rst new file mode 100644 index 00000000..e0586ffa --- /dev/null +++ b/docs/source/api/services_http_api.rst @@ -0,0 +1,111 @@ +There are 5 types of dialog services that can be connected to the `Agent's dialog pipeline `__: + + * **Annotators** + * **Skill Selector** + * **Skills** + * **Response Selector** + * **Postprocessor** + + +Input Format +============ + +All services get a standardized Agent State as input. The input format is described `here `__. + +To reformat Agent State format into your service's input format, you need to write a **formatter** function and +specify it's name into the Agent's `config file `__. You can use our DeepPavlov `formatters `__ +as example. + +Output Format +============= + +All services have it's own specified output format. If you need to reformat your service's response, you should use the same +formatter function that you used for the input format, just use the ``mode=='out'`` flag. + +Annotator +========= + +Annotator should return a free-form response. + +For example, the NER annotator may return a dictionary with ``tokens`` and ``tags`` keys: + + .. code:: json + + {"tokens": ["Paris"], "tags": ["I-LOC"]} + +For example, a Sentiment annotator can return a list of labels: + + .. code:: json + + ["neutral", "speech"] + +Also a Sentiment annotator can return just a string: + + .. code:: json + + "neutral" + +Skill Selector +============== + +Skill Selector should return a list of selected skill names. + +For example: + + .. code:: json + + ["chitchat", "hello_skill"] + + +Skill +===== + +Skill should return a dict with required ``text`` and ``confidence`` keys. If a skill wants to +update either **Human** or **Bot** profile, it should pack these attributes into ``human_attributes`` and +``bot_attributes`` keys. All attributes in ``human_attributes`` and ``bot_attributes`` will overwrite +current **Human** and **Bot** attribute values accordingly. And if there are no such attributes, they will be stored under +``attributes`` key inside **Human** or **Bot**. + +The minimum required response of a skill is a 2-key dictionary: + + + .. code:: json + + {"text": "hello", "confidence": 0.33} + +But it's possible to extend it with ``human_attributes`` and ``bot_attributes`` keys: + + .. code:: json + + {"text": "hello", "confidence": 0.33, "human_attributes": {"name": "Vasily"}, + "bot_attributes": {"persona": ["I like swimming.", "I have a nice swimming suit."]}} + +Response Selector +================= + +Unlike Skill Selector, Response Selector should select a *single* skill responsible for generation of the +final response shown to the user. The expected result is a name of the selected skill: + + .. code:: json + + "chitchat" + +Postprocessor +============= + +Postprocessor has a power to rewrite a final bot answer selected by the Response Selector. For example, it can +take a user's name from the state and add it to the final answer. + +It simply should return a rewritten answer. The rewritten answer will go the ``text`` field of the final +utterance shown to the user, and the original skill answer will go to the ``orig_text`` field. + + .. code:: json + + "Goodbye, Joe!" + + +.. _dialog-pipeline: https://deeppavlov-agent.readthedocs.io/en/latest/intro/overview.html#architecture-overview +.. _state: https://deeppavlov-agent.readthedocs.io/en/latest/_static/api.html +.. _config file: https://github.com/deepmipt/dp-agent/blob/master/config.py +.. _formatters: https://github.com/deepmipt/dp-agent/blob/master/state_formatters/dp_formatters.py + diff --git a/docs/source/api/user_state_api.rst b/docs/source/api/user_state_api.rst new file mode 100644 index 00000000..3fd0e0b3 --- /dev/null +++ b/docs/source/api/user_state_api.rst @@ -0,0 +1,36 @@ +User State API +============== + +Each utterance in a **Dialog** is generated either by a **Human** or by a **Bot**. To understand, which of two has generated +the utterance, refer to the ``user.user_type`` field: + + .. code:: json + + "utterances": [ + { + "user": { + "user_type": "human" + } + }] + +A `Skill `__ can update any fields in **User** (**Human** or **Bot**) objects. If a **Skill** updates a **Human**, +the **Human** fields will be changed in this utterance accordingly. If a **Skill** updates a **Bot**, the **Bot** fields will be +changed in the *next* (generated by the bot) utterance. + +Each new dialog starts with a new **Bot** with all default fields. However, the **Human** object is updated permanently, and +when a **Human** starts a new dialog, the object is retrieved from a database with all updated fields. + +The history of all changes made by skills to users can be looked up at the selected skills responses +in the ``selected_skills`` field of a human utterance: + + .. code:: json + + "utterances": [ + { + "user": { + "user_type": "human" + }, + "selected_skills": {} + }] + +.. _skill: https://deeppavlov-agent.readthedocs.io/en/latest/api/services_http_api.html#skill diff --git a/docs/source/conf.py b/docs/source/conf.py index 1395ae45..9822b882 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -47,7 +47,8 @@ 'sphinx.ext.imgmath', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages' + 'sphinx.ext.githubpages', + 'sphinx.ext.autosectionlabel' ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/index.rst b/docs/source/index.rst index b28c30b0..ba860985 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,10 +8,18 @@ Welcome to DeepPavlov Agent documentation! :maxdepth: 2 :caption: Overview - Overview + intro/overview -Indices and tables -================== -* :ref:`genindex` -* :ref:`search` +.. toctree:: + :maxdepth: 2 + :caption: Services HTTP API + + api/services_http_api + + +.. toctree:: + :maxdepth: 2 + :caption: User State API + + api/user_state_api \ No newline at end of file diff --git a/docs/source/intro/overview.rst b/docs/source/intro/overview.rst index 6f6e6ae9..7259caaa 100644 --- a/docs/source/intro/overview.rst +++ b/docs/source/intro/overview.rst @@ -34,11 +34,11 @@ so the platform should have the following characteristics: * ``Postprocessor`` is a service postprocessing a bot utterance. It can make some basic things like adding a user name to the reply, inserting emojis, etc.; - * ``Postprocessed Response`` is a final postprocessed bot utterance that is shown to the user. +* ``Postprocessed Response`` is a final postprocessed bot utterance that is shown to the user. - * ``State`` is current dialogs between users and a bot serialized as **json**. State is used to pass information - across the services and contains all possibly needed information about the current dialogs. - It has separate `documentation `__. +* ``State`` is current dialogs between users and a bot serialized as **json**. State is used to pass information + across the services and contains all possibly needed information about the current dialogs. + It has separate `documentation `__. Ready Agent from the box From 0635e929cb24cad666080f6d8c885a69f1573638 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 20 Sep 2019 19:39:43 +0300 Subject: [PATCH 023/133] docs: update dev requirements --- dev_requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 46cce4b9..5690a6d1 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,3 +1,4 @@ sphinx==2.1.2 recommonmark==0.5.0 -sphinx_rtd_theme \ No newline at end of file +sphinx_rtd_theme +Pygments==2.4.2 \ No newline at end of file From d1279df5caff20c90c9e8b6776988322387c7c94 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 23 Sep 2019 17:34:05 +0300 Subject: [PATCH 024/133] feat: logging services time --- core/agent.py | 2 ++ core/run.py | 33 ++++++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/core/agent.py b/core/agent.py index ef303fb0..03e0699d 100644 --- a/core/agent.py +++ b/core/agent.py @@ -40,6 +40,8 @@ def get_workflow_record(self, dialog_id): def flush_record(self, dialog_id: str): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') + if self.response_logger_callable: + self.response_logger_callable(dialog_id, self.workflow) return self.workflow.pop(dialog_id) def register_service_request(self, dialog_id: str, service_name): diff --git a/core/run.py b/core/run.py index db8072dd..dfd2204c 100644 --- a/core/run.py +++ b/core/run.py @@ -1,6 +1,7 @@ import asyncio import argparse import uuid +import logging from aiohttp import web from datetime import datetime @@ -8,26 +9,46 @@ from aiohttp_swagger import * from core.agent import Agent -from core.pipeline import Pipeline, Service, simple_workflow_formatter +from core.pipeline import Pipeline, Service from core.connectors import EventSetOutputConnector, HttpOutputConnector from core.config_parser import parse_old_config from core.state_manager import StateManager from core.transform_config import DEBUG +logger = logging.getLogger('service_logger') +logger.setLevel(logging.INFO) +fh = logging.FileHandler('../service.log') +fh.setLevel(logging.INFO) +logger.addHandler(fh) + + parser = argparse.ArgumentParser() parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, choices=['cmd_client', 'http_client'], default='cmd_client') parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) parser.add_argument('-d', '--debug', help='run in debug mode', action='store_true') +parser.add_argument('-rl', '--response-logger', help='run agent with services response logging', action='store_true') args = parser.parse_args() CHANNEL = args.channel -def prepare_agent(services, endpoint: Service): +def response_logger(dialog_id, workflow): + for service_name, service_data in workflow[dialog_id]['services'].items(): + done = service_data['done'] + send = service_data['send'] + if send is None or done is None: + continue + logger.info(f'{service_name}\t{round(done - send, 5)}\tseconds') + + +def prepare_agent(services, endpoint: Service, use_response_logger: bool): pipeline = Pipeline(services) pipeline.add_responder_service(endpoint) - agent = Agent(pipeline, StateManager()) - + if use_response_logger: + response_logger_callable = response_logger + else: + response_logger_callable = None + agent = Agent(pipeline, StateManager(), response_logger_callable=response_logger_callable) return agent.register_msg, agent.process @@ -41,6 +62,7 @@ async def run(register_msg): deadline_timestamp=None, require_response=True) print('Bot: ', response['dialog'].utterances[-1].text) + async def on_shutdown(app): await app['client_session'].close() @@ -132,7 +154,7 @@ def main(): endpoint = Service('cmd_responder', EventSetOutputConnector(), None, 1, ['responder'], set()) loop = asyncio.get_event_loop() loop.set_debug(args.debug) - register_msg, process = prepare_agent(services, endpoint) + register_msg, process = prepare_agent(services, endpoint, use_response_logger=args.response_logger) future = asyncio.ensure_future(run(register_msg)) for i in workers: loop.create_task(i.call_service(process)) @@ -147,6 +169,7 @@ def main(): loop.run_until_complete(session.close()) loop.stop() loop.close() + logging.shutdown() elif CHANNEL == 'http_client': intermediate_storage = {} endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage), None, 1, ['responder']) From 05d715d12f79e8a9214cdf496b806e6971d349ec Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Mon, 23 Sep 2019 18:44:24 +0300 Subject: [PATCH 025/133] --wip-- [skip ci] --- .flake8 | 4 + .gitignore | 5 +- core/agent.py | 64 +++++++------- core/config_parser.py | 22 +++-- core/pipeline.py | 35 ++++---- core/run.py | 2 - core/state_manager.py | 91 ++++++++++++++++++-- core/state_schema.py | 118 +++++++++++++++++++++++-- tests/dummy_connectors_test_setup.py | 123 +++++++++++++++++++++++++++ 9 files changed, 391 insertions(+), 73 deletions(-) create mode 100644 .flake8 create mode 100644 tests/dummy_connectors_test_setup.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..839f398f --- /dev/null +++ b/.flake8 @@ -0,0 +1,4 @@ +[flake8] +max-line-length=120 +ignore=D100,D101,D102,D103,D107 +exclude=.git,__pycache__,build,dist diff --git a/.gitignore b/.gitignore index df272c15..5e450c5e 100644 --- a/.gitignore +++ b/.gitignore @@ -107,4 +107,7 @@ venv.bak/ .idea/ #GIT -.git/ \ No newline at end of file +.git/ + +#vscode +.vscode/ \ No newline at end of file diff --git a/core/agent.py b/core/agent.py index 03e0699d..e391f127 100644 --- a/core/agent.py +++ b/core/agent.py @@ -24,10 +24,11 @@ def __init__(self, pipeline: Pipeline, state_manager: StateManager, def add_workflow_record(self, dialog: Dialog, deadline_timestamp: Optional[float] = None, **kwargs): if str(dialog.id) in self.workflow.keys(): raise ValueError(f'dialog with id {dialog.id} is already in workflow') - workflow_record = {'dialog': dialog, 'services': defaultdict(dict)} + workflow_record = {'dialog_object': dialog, 'dialog': dialog.to_dict(), 'services': defaultdict(dict)} if deadline_timestamp: workflow_record['deadline_timestamp'] = deadline_timestamp - + if 'dialog_object' in kwargs: + raise ValueError("'dialog_object' is system reserved workflow record field") workflow_record.update(kwargs) self.workflow[str(dialog.id)] = workflow_record @@ -47,29 +48,23 @@ def flush_record(self, dialog_id: str): def register_service_request(self, dialog_id: str, service_name): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - self.workflow[dialog_id]['services'][service_name] = {'send': time(), 'done': None} + self.workflow[dialog_id]['services'][service_name] = {'send': time(), 'done': None, 'skipped': False} def get_services_status(self, dialog_id: str): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - done, waiting = set(), set() + done, waiting, skipped = set(), set(), set() for key, value in self.workflow[dialog_id]['services'].items(): - if value['done'] is not None: + if value['skipped']: + skipped.add(key) + elif value['done'] is not None: done.add(key) else: waiting.add(key) - return done, waiting - - def process_service_response(self, dialog_id: str, service_name: str = None, response: str = None): - ''' - Ultimate method, which performs next operations: - 1. Updates workflow dict with completed service - 2. Updates dialog within workflow dict, using service update callable - 3. Asks pipeline for next services which became available on current stage workflow - 4. Modifies next services when processed responce is received from a selector service - 5. Returns next services in list form - ''' + return done, waiting, skipped + + async def process_service_response(self, dialog_id: str, service_name: str = None, response: str = None): workflow_record = self.get_workflow_record(dialog_id) # Updating workflow with service response @@ -77,33 +72,36 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res if service: self.workflow[dialog_id]['services'][service_name]['done'] = time() if response and service.state_processor_method: - service.state_processor_method(workflow_record['dialog'], response) + service.state_processor_method(dialog=workflow_record['dialog'], + dialog_object=workflow_record['dialog_object'], + payload=response) # Calculating next steps - done, waiting = self.get_services_status(dialog_id) - next_services = self.pipeline.get_next_services(done, waiting) + done, waiting, skipped = self.get_services_status(dialog_id) + next_services_dict = self.pipeline.get_next_services(done, waiting) # Processing the case, when service is skill selector if service and service.is_selector(): selected_services = list(response.values())[0] result = [] - for service in next_services: - if service.name not in selected_services: - self.workflow[dialog_id]['services'][service.name] = {'done': time(), 'send': None} + for name, service in next_services_dict.values(): + if name not in selected_services: + self.workflow[dialog_id]['services'][name] = {'done': None, 'send': None, 'skipped': True} else: result.append(service) next_services = result + else: + next_services = [service for name, service in next_services_dict.values if name not in skipped] if self.process_logger_callable: self.process_logger_callable(self.workflow['dialog_id']) # send dialog workflow record to further logging operations - + return next_services async def register_msg(self, utterance: str, user_telegram_id: Hashable, user_device_type: Any, date_time: datetime, location=Any, channel_type=str, deadline_timestamp=None, - require_response=False, **kwargs): - event = None + require_response=False, should_reset=False, **kwargs): hold_flush = False user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) should_reset = True if utterance == TG_START_UTT else False @@ -111,10 +109,11 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, self.state_manager.add_human_utterance(dialog, user, utterance, date_time) if require_response: event = asyncio.Event() + kwargs['event'] = event hold_flush = True self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, event=event, hold_flush=hold_flush, **kwargs) - await self.process(str(dialog.id)) + await self.process(str(dialog.id), 'input', utterance) if require_response: await event.wait() workflow_record = self.get_workflow_record(str(dialog.id)) @@ -126,13 +125,13 @@ async def process(self, dialog_id, service_name=None, response=None): next_services = self.process_service_response(dialog_id, service_name, response) service_requests = [] - has_responder = False + has_responder = [] for service in next_services: self.register_service_request(dialog_id, service.name) payload = service.apply_workflow_formatter(workflow_record) - service_requests.append(service.connector_callable(payload)) + service_requests.append(service.connector_func(payload)) if service.is_responder(): - has_responder = True + has_responder.append(service) responses = await asyncio.gather(*service_requests, return_exceptions=True) @@ -144,5 +143,8 @@ async def process(self, dialog_id, service_name=None, response=None): tasks.append(self.process(dialog_id, service.name, response)) await asyncio.gather(*tasks) - if has_responder and not workflow_record.get('hold_flush', False): - self.flush_record(dialog_id) + if has_responder: # TODO(Pugin): this part breaks some processing logic on the end + for i in has_responder: + i.state_processor_method(workflow_record['dialog'], workflow_record['dialog_object'], None) + if not workflow_record.get('hold_flush', False): + self.flush_record(dialog_id) diff --git a/core/config_parser.py b/core/config_parser.py index 4a636d91..0386dd17 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -21,18 +21,22 @@ def make_service_from_config_rec(conf_record, session, state_processor_method, t formatter = conf_record['formatter'] batch_size = conf_record.get('batch_size', 1) url = conf_record['url'] - url2 = conf_record.get('url2', None) + if conf_record['protocol'] == 'http': - if batch_size == 1 and not url2: - connector = HTTPConnector(session, url, formatter, conf_record['name']) + if batch_size == 1 and isinstance(url, str): + connector_func = HTTPConnector(session, url, formatter, conf_record['name']).send else: queue = asyncio.Queue() - connector = AioQueueConnector(queue) # worker task and queue connector - worker_tasks.append(QueueListenerBatchifyer(session, url, formatter, name, queue, batch_size)) - if url2: - worker_tasks.append(QueueListenerBatchifyer(session, url2, formatter, name, queue, batch_size)) - - service = Service(name, connector, state_processor_method, batch_size, + connector_func = AioQueueConnector(queue).send # worker task and queue connector + if isinstance(url, str): + urls = [url] + else: + urls = url + for u in urls: + worker_tasks.append(QueueListenerBatchifyer(session, u, formatter, + name, queue, batch_size)) + + service = Service(name, connector_func, state_processor_method, batch_size, tags, names_previous_services, simple_workflow_formatter) return service, worker_tasks diff --git a/core/pipeline.py b/core/pipeline.py index 419956d2..7773202e 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -2,19 +2,16 @@ class Service: - def __init__(self, name, connector=None, state_processor_method=None, + def __init__(self, name, connector_func, state_processor_method=None, batch_size=1, tags=None, names_previous_services=None, - workflow_formatter=None, connector_callable=None): + workflow_formatter=None): self.name = name self.batch_size = batch_size self.state_processor_method = state_processor_method self.names_previous_services = names_previous_services or set() self.tags = tags or [] self.workflow_formatter = workflow_formatter - if not (connector or connector_callable): - raise ValueError('Either connector or connector_callable should be provided') - self.connector = connector - self._connector_callable = connector_callable + self.connector_func = connector_func self.previous_services = set() self.next_services = set() @@ -24,18 +21,14 @@ def is_selector(self): def is_responder(self): return 'responder' in self.tags + def is_input(self): + return 'input' in self.tags + def apply_workflow_formatter(self, workflow_record): if not self.workflow_formatter: return workflow_record return self.workflow_formatter(workflow_record) - @property - def connector_callable(self): - if self._connector_callable: - return self._connector_callable - else: - return self.connector.send - class Pipeline: def __init__(self, services): @@ -75,16 +68,16 @@ def get_next_services(self, done=None, waiting=None): waiting = set() removed_names = waiting | done for name, service in self.services.items(): - if not {i.name for i in service.previous_services} <= done: + if not {i.name for i in service.previous_services} <= done or service.is_input(): removed_names.add(name) - return [service for name, service in self.services.items() if name not in removed_names] + return {name: service for name, service in self.services.items() if name not in removed_names} def get_endpoint_services(self): return [s for s in self.services.values() if not s.next_services and 'responder' not in s.tags] def add_responder_service(self, service): - if 'responder' not in service.tags: + if not service.is_responder(): raise ValueError('service should be a responder') endpoints = self.get_endpoint_services() service.previous_services = set(endpoints) @@ -94,6 +87,16 @@ def add_responder_service(self, service): for s in endpoints: self.services[s.name].next_services.add(service) + def add_input_service(self, service): + if not service.is_input(): + raise ValueError('service should be an input') + starting_services = self.get_next_services() + service.next_services = set(starting_services) + self.services[service.name] = service + + for s in starting_services: + self.services[s.name].previous_services.add(service) + def simple_workflow_formatter(workflow_record): return workflow_record['dialog'].to_dict() diff --git a/core/run.py b/core/run.py index dfd2204c..86523a89 100644 --- a/core/run.py +++ b/core/run.py @@ -6,7 +6,6 @@ from aiohttp import web from datetime import datetime from string import hexdigits -from aiohttp_swagger import * from core.agent import Agent from core.pipeline import Pipeline, Service @@ -73,7 +72,6 @@ async def init_app(register_msg, intermediate_storage, on_startup, on_shutdown_f app.router.add_post('/', handle_func) app.router.add_get('/dialogs', users_dialogs) app.router.add_get('/dialogs/{dialog_id}', dialog) - setup_swagger(app, swagger_url='/docs') app.on_startup.append(on_startup) app.on_shutdown.append(on_shutdown_func) return app diff --git a/core/state_manager.py b/core/state_manager.py index c1e07663..26306ecb 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -1,7 +1,8 @@ from datetime import datetime from typing import Hashable, Any, Optional, Dict, TypeVar, List +from copy import deepcopy -from core.state_schema import User, Human, Bot, HumanUtterance, BotUtterance, Dialog +from core.state_schema import User, Human, Bot, HumanUtterance, BotUtterance, Dialog, HUMAN_UTTERANCE_SCHEMA, BOT_UTTERANCE_SCHEMA, BOT_SCHEMA, HUMAN_SCHEMA, DIALOG_SCHEMA from core.connection import connect userT = TypeVar('userT', bound=User) @@ -10,8 +11,8 @@ class StateManager: @staticmethod - def create_new_dialog(human_id, location=None, channel_type=None): - dialog = Dialog(human_id=human_id, location=location or Dialog.location.default, + def create_new_dialog(human, bot, location=None, channel_type=None): + dialog = Dialog(human=human, bot=bot, location=location or Dialog.location.default, channel_type=channel_type) dialog.save() return dialog @@ -67,12 +68,13 @@ def get_or_create_user(cls, user_telegram_id=Hashable, user_device_type=Any): @classmethod def get_or_create_dialog(cls, user, location, channel_type, should_reset=False): if should_reset: - dialog = cls.create_new_dialog(user.id, location=location, + bot = cls.create_new_bot() + dialog = cls.create_new_dialog(human=user, bot=bot, location=location, channel_type=channel_type) else: - exist_dialogs = Dialog.objects(human_id__exact=user.id) + exist_dialogs = Dialog.objects(human__exact=user) if not exist_dialogs: - dialog = cls.create_new_dialog(user.id, location=location, + dialog = cls.create_new_dialog(human=user, bot=bot, location=location, channel_type=channel_type) else: dialog = exist_dialogs[0] @@ -165,3 +167,80 @@ def update_bot(bot: Bot, active_skill: Dict): else: bot.attributes[attr_name] = attr_value bot.save() + + @classmethod + def add_human_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs) -> None: + utterance = HUMAN_UTTERANCE_SCHEMA + utterance['text'] = payload + utterance['date_time'] = str(datetime.now()) + utterance['user'] = dialog['user'] + dialog['utterances'].append(utterance) + + @staticmethod + def update_human_dict(human: Dict, active_skill: Dict): + attributes = active_skill.get('human_attributes', {}) + for attr_name, attr_value in attributes.items(): + if attr_name in human: + human[attr_name] = attr_value + elif attr_name in human['profile']: + human['profile'][attr_name] = attr_value + else: + human['attributes'][attr_name] = attr_value + + + @staticmethod + def update_bot_dict(bot: Dict, active_skill: Dict): + attributes = active_skill.get('bot_attributes', {}) + for attr_name, attr_value in attributes.items(): + if attr_name in bot: + bot[attr_name] = attr_value + else: + bot['attributes'][attr_name] = attr_value + + @classmethod + def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs) -> None: + active_skill_name = list(payload.values())[0] + active_skill = dialog['utterances'][-1]['selected_skills'].get(active_skill_name, None) + if not active_skill: + raise ValueError(f'provided {payload} is not valid') + cls.update_human_dict(dialog['human'], active_skill) + cls.update_bot_dict(dialog['bot'], active_skill) + + utterance = BOT_UTTERANCE_SCHEMA + utterance['text'] = active_skill['text'] + utterance['orig_text'] = active_skill['text'] + utterance['date_time'] = str(datetime.now()) + utterance['active_skill'] = active_skill_name + utterance['confidence'] = active_skill['confidence'] + utterance['user'] = dialog['bot'] + dialog['utterances'].append(utterance) + + @staticmethod + def add_annotation_dict(dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs): + dialog['utterances'][-1]['annotations'].update(payload) + + @staticmethod + def add_selected_skill_dict(dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs): + dialog['utterances'][-1]['selected_skills'].update(payload) + + @staticmethod + def add_text_dict(dialog: Dict, payload: str): + dialog['utterances'][-1]['text'] = payload + + @staticmethod + def save_dialog_dict(dialog: Dict, dialog_object: Dialog, payload=None): + utt_objects = [] + for utt in dialog['utterances'][::-1]: + if not utt['id']: + if utt['type'] == 'human': + utt_objects.append(HumanUtterance.from_dict(utt)) + elif utt['type'] == 'bot': + utt_objects.append(BotUtterance.from_dict(utt)) + else: + raise ValueError('utterance of unknown type') + else: + break + for utt in utt_objects[::-1]: + dialog_object.utterances.append(utt) + + dialog_object.save() \ No newline at end of file diff --git a/core/state_schema.py b/core/state_schema.py index 6c407dc1..8270327f 100644 --- a/core/state_schema.py +++ b/core/state_schema.py @@ -4,7 +4,62 @@ DateTimeField, FloatField, DictField, ObjectIdField from . import STATE_API_VERSION - +from datetime import datetime + +HUMAN_UTTERANCE_SCHEMA = { + 'id': None, + 'text': None, + 'user': {}, + 'annotations': {}, + 'date_time': None, + 'selected_skills': {}, + 'type': 'human' +} + +BOT_UTTERANCE_SCHEMA = { + 'id': None, + 'active_skill': None, + 'confidence': None, + 'text': None, + 'orig_text': None, + 'user': {}, + 'annotations': {}, + 'date_time': None, + 'type': 'bot' +} + +BOT_SCHEMA = { + 'id': None, + 'persona': [], + 'attributes': {} +} + +HUMAN_SCHEMA = { + 'id': None, + 'user_telegram_id': None, + 'device_type': None, + 'persona': [], + 'profile': { + "name": None, + "gender": None, + "birthdate": None, + "location": None, + "home_coordinates": None, + "work_coordinates": None, + "occupation": None, + "income_per_year": None + }, + 'attributes': {} +} + +DIALOG_SCHEMA = { + 'id': [], + 'location': [], + 'utterances': [], + 'channel_type': None, + 'human': None, + 'bot': None +} class User(DynamicDocument): persona = ListField(default=[]) @@ -24,6 +79,11 @@ def to_dict(self): 'persona': self.persona, 'attributes': str(self.attributes) } + @classmethod + def from_dict(cls, payload): + bot = cls() + + return bot class Human(User): @@ -37,7 +97,7 @@ class Human(User): "home_coordinates": None, "work_coordinates": None, "occupation": None, - "income_per_year": None + "income_per_year": None, }) def to_dict(self): @@ -49,12 +109,18 @@ def to_dict(self): 'profile': self.profile, 'attributes': str(self.attributes) } + + @classmethod + def from_dict(cls, payload): + human = cls() + + return human class Utterance(DynamicDocument): text = StringField(required=True) annotations = DictField(default={}) - user = ReferenceField(User, required=True) + user = DictField(default={}) date_time = DateTimeField(required=True) meta = {'allow_inheritance': True} @@ -69,10 +135,23 @@ class HumanUtterance(Utterance): def to_dict(self): return {'id': str(self.id), 'text': self.text, - 'user': self.user.to_dict(), + 'user': self.user, 'annotations': self.annotations, 'date_time': str(self.date_time), - 'selected_skills': self.selected_skills} + 'selected_skills': self.selected_skills}, + 'type': 'human' + + @classmethod + def from_dict(cls, payload): + utterance = cls() + utterance.id = payload['id'] + utterance.text = payload['text'] + utterance.annotations = payload['annotations'] + utterance.date_time = payload['date_time'] + utterance.selected_skills = payload['selected_skills'] + utterance.user = payload['user'] + utterance.save() + return utterance class BotUtterance(Utterance): @@ -89,16 +168,32 @@ def to_dict(self): 'orig_text': self.orig_text, 'user': self.user.to_dict(), 'annotations': self.annotations, - 'date_time': str(self.date_time) + 'date_time': str(self.date_time), + 'type': 'bot' } + @classmethod + def from_dict(cls, payload): + utterance = cls() + utterance.id = payload['id'] + utterance.text = payload['text'] + utterance.orig_text = payload['orig_text'] + utterance.annotations = payload['annotations'] + utterance.date_time = payload['date_time'] + utterance.active_skill = payload['active_skill'] + utterance.confidence = payload['confidence'] + utterance.user = payload['user'] + utterance.save() + return utterance + class Dialog(DynamicDocument): location = DynamicField() utterances = ListField(ReferenceField(Utterance), default=[]) channel_type = StringField(choices=['telegram', 'vk', 'facebook', 'cmd_client', 'http_client'], default='telegram') version = StringField(default=STATE_API_VERSION, required=True) - human_id = ObjectIdField(required=True) + human = ReferenceField(Human, required=True) + bot = ReferenceField(Bot, required=True) def to_dict(self): return { @@ -106,6 +201,13 @@ def to_dict(self): 'location': self.location, 'utterances': [utt.to_dict() for utt in self.utterances], 'channel_type': self.channel_type, - 'human_id': self.human_id + 'human': self.human.to_dict(), + 'bot': self.bot.to_dict() } + @classmethod + def from_dict(cls, payload): + dialog = cls() + dialog.location = payload['location'] + dialog.channel_type = payload['channel_type'] + return dialog diff --git a/tests/dummy_connectors_test_setup.py b/tests/dummy_connectors_test_setup.py new file mode 100644 index 00000000..c13eb3a1 --- /dev/null +++ b/tests/dummy_connectors_test_setup.py @@ -0,0 +1,123 @@ +import asyncio +import argparse +import uuid + + +from aiohttp import web +from datetime import datetime +from string import hexdigits +from random import choice + +from core.agent import Agent +from core.pipeline import Pipeline, Service +from core.connectors import HttpOutputConnector +from core.config_parser import parse_old_config +from core.state_manager import StateManager +from core.run import prepare_agent + +parser = argparse.ArgumentParser() +parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) +args = parser.parse_args() +CHANNEL = 'vk' + + +class DummyConnector: + def __init__(self, returns, sleeptime, service_name): + self.returns = returns + self.sleeptime = sleeptime + self.service_name = service_name + + async def send(self, payload): + await asyncio.sleep(self.sleeptime) + return {self.service_name: {"text": choice(self.returns), "confidence": 0.5}} + + +class DummySelectorConnector: + def __init__(self, returns, sleeptime, service_name): + self.returns = returns + self.sleeptime = sleeptime + self.service_name = service_name + + async def send(self, payload): + await asyncio.sleep(self.sleeptime) + return {self.service_name: self.returns} + + +async def on_shutdown(app): + await app['client_session'].close() + + +async def init_app(register_msg, intermediate_storage, on_startup, on_shutdown_func=on_shutdown): + app = web.Application(debug=True) + handle_func = await api_message_processor(register_msg, intermediate_storage) + app.router.add_post('/', handle_func) + app.on_startup.append(on_startup) + app.on_shutdown.append(on_shutdown_func) + return app + + +def prepare_startup(consumers, process_callable, session): + result = [] + for i in consumers: + result.append(asyncio.ensure_future(i.call_service(process_callable))) + + async def startup_background_tasks(app): + app['consumers'] = result + app['client_session'] = session + + return startup_background_tasks + + +async def api_message_processor(register_msg, intermediate_storage): + async def api_handle(request): + user_id = None + bot_response = None + if request.method == 'POST': + if request.headers.get('content-type') != 'application/json': + raise web.HTTPBadRequest(reason='Content-Type should be application/json') + data = await request.json() + user_id = data.get('user_id') + payload = data.get('payload', '') + + if not user_id: + raise web.HTTPBadRequest(reason='user_id key is required') + + event = asyncio.Event() + message_uuid = uuid.uuid4().hex + await register_msg(utterance=payload, user_telegram_id=user_id, user_device_type='http', + date_time=datetime.now(), location='', channel_type=CHANNEL, + event=event, message_uuid=message_uuid) + await event.wait() + bot_response = intermediate_storage.pop(message_uuid) + + if bot_response is None: + raise RuntimeError('Got None instead of a bot response.') + + return web.json_response({'user_id': user_id, 'response': bot_response}) + + return api_handle + + +def main(): + services, workers, session = parse_old_config() + + for s in services: + if 'RESPONSE_SELECTORS' in s.tags: + continue + if s.is_selector(): + s.connector_func = DummySelectorConnector(['chitchat', 'odqa'], 0.01, s.name).send + else: + s.connector_func = DummyConnector(['we have a phrase', 'and another one', 'not so short one'], 0.01, s.name).send + intermediate_storage = {} + endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage).send, + StateManager.save_dialog_dict, 1, ['responder']) + input = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) + register_msg, process_callable = prepare_agent(services, endpoint, input) + app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), + on_shutdown) + + web.run_app(app, port=args.port) + + +if __name__ == '__main__': + main() From 54e923a55f7e55c8fb22a7d770363780efe3756c Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 24 Sep 2019 11:38:39 +0300 Subject: [PATCH 026/133] changes in database processing with new userprocessing and other features --- core/agent.py | 27 ++++++++++--------------- core/config_parser.py | 22 ++++++++++---------- core/connectors.py | 2 +- core/pipeline.py | 4 ++-- core/run.py | 17 ++++++++++------ core/state_manager.py | 14 +++++++++---- core/state_schema.py | 47 ++++++++++++++++++++++--------------------- 7 files changed, 70 insertions(+), 63 deletions(-) diff --git a/core/agent.py b/core/agent.py index e391f127..b7954df4 100644 --- a/core/agent.py +++ b/core/agent.py @@ -48,23 +48,21 @@ def flush_record(self, dialog_id: str): def register_service_request(self, dialog_id: str, service_name): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - self.workflow[dialog_id]['services'][service_name] = {'send': time(), 'done': None, 'skipped': False} + self.workflow[dialog_id]['services'][service_name] = {'send': time(), 'done': None} def get_services_status(self, dialog_id: str): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - done, waiting, skipped = set(), set(), set() + done, waiting = set(), set() for key, value in self.workflow[dialog_id]['services'].items(): - if value['skipped']: - skipped.add(key) - elif value['done'] is not None: + if value['done'] is not None: done.add(key) else: waiting.add(key) - return done, waiting, skipped + return done, waiting - async def process_service_response(self, dialog_id: str, service_name: str = None, response: str = None): + def process_service_response(self, dialog_id: str, service_name: str = None, response: str = None): workflow_record = self.get_workflow_record(dialog_id) # Updating workflow with service response @@ -77,21 +75,19 @@ async def process_service_response(self, dialog_id: str, service_name: str = Non payload=response) # Calculating next steps - done, waiting, skipped = self.get_services_status(dialog_id) - next_services_dict = self.pipeline.get_next_services(done, waiting) + done, waiting = self.get_services_status(dialog_id) + next_services = self.pipeline.get_next_services(done, waiting) # Processing the case, when service is skill selector if service and service.is_selector(): selected_services = list(response.values())[0] result = [] - for name, service in next_services_dict.values(): - if name not in selected_services: - self.workflow[dialog_id]['services'][name] = {'done': None, 'send': None, 'skipped': True} + for service in next_services: + if service.name not in selected_services: + self.workflow[dialog_id]['services'][service.name] = {'done': time(), 'send': None} else: result.append(service) next_services = result - else: - next_services = [service for name, service in next_services_dict.values if name not in skipped] if self.process_logger_callable: self.process_logger_callable(self.workflow['dialog_id']) # send dialog workflow record to further logging operations @@ -106,13 +102,12 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) should_reset = True if utterance == TG_START_UTT else False dialog = self.state_manager.get_or_create_dialog(user, location, channel_type, should_reset=should_reset) - self.state_manager.add_human_utterance(dialog, user, utterance, date_time) if require_response: event = asyncio.Event() kwargs['event'] = event hold_flush = True self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, - event=event, hold_flush=hold_flush, **kwargs) + hold_flush=hold_flush, **kwargs) await self.process(str(dialog.id), 'input', utterance) if require_response: await event.wait() diff --git a/core/config_parser.py b/core/config_parser.py index 0386dd17..85924aeb 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -45,7 +45,7 @@ def add_bot_to_name(name): return f'bot_{name}' for anno in ANNOTATORS_1: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, ['ANNOTATORS_1'], set()) services.append(service) worker_tasks.extend(workers) @@ -54,7 +54,7 @@ def add_bot_to_name(name): if ANNOTATORS_2: for anno in ANNOTATORS_2: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, ['ANNOTATORS_2'], previous_services) services.append(service) worker_tasks.extend(workers) @@ -63,7 +63,7 @@ def add_bot_to_name(name): if ANNOTATORS_3: for anno in ANNOTATORS_3: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, ['ANNOTATORS_3'], previous_services) services.append(service) worker_tasks.extend(workers) @@ -81,7 +81,7 @@ def add_bot_to_name(name): if SKILLS: for s in SKILLS: - service, workers = make_service_from_config_rec(s, session, StateManager.add_selected_skill, + service, workers = make_service_from_config_rec(s, session, StateManager.add_selected_skill_dict, ['SKILLS'], previous_services) services.append(service) worker_tasks.extend(workers) @@ -89,12 +89,12 @@ def add_bot_to_name(name): previous_services = {i.name for i in services if 'SKILLS' in i.tags} if not RESPONSE_SELECTORS: - services.append(Service('confidence_response_selector', ConfidenceResponseSelectorConnector(), - StateManager.add_bot_response, + services.append(Service('confidence_response_selector', ConfidenceResponseSelectorConnector().send, + StateManager.add_bot_utterance_simple_dict, 1, ['RESPONSE_SELECTORS'], previous_services, simple_workflow_formatter)) else: for r in RESPONSE_SELECTORS: - service, workers = make_service_from_config_rec(r, session, StateManager.add_bot_response, + service, workers = make_service_from_config_rec(r, session, StateManager.add_bot_utterance_simple_dict, ['RESPONSE_SELECTORS'], previous_services) services.append(service) worker_tasks.extend(workers) @@ -103,7 +103,7 @@ def add_bot_to_name(name): if POSTPROCESSORS: for p in POSTPROCESSORS: - service, workers = make_service_from_config_rec(p, session, StateManager.add_text, + service, workers = make_service_from_config_rec(p, session, StateManager.add_text_dict, ['POSTPROCESSORS'], previous_services) services.append(service) worker_tasks.extend(workers) @@ -112,7 +112,7 @@ def add_bot_to_name(name): if ANNOTATORS_1: for anno in ANNOTATORS_1: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, ['POST_ANNOTATORS_1'], previous_services, add_bot_to_name) services.append(service) worker_tasks.extend(workers) @@ -121,7 +121,7 @@ def add_bot_to_name(name): if ANNOTATORS_2: for anno in ANNOTATORS_2: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, ['POST_ANNOTATORS_2'], previous_services, add_bot_to_name) services.append(service) worker_tasks.extend(workers) @@ -129,7 +129,7 @@ def add_bot_to_name(name): previous_services = {i.name for i in services if 'POST_ANNOTATORS_2' in i.tags} for anno in ANNOTATORS_3: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation, ['POST_ANNOTATORS_3'], + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, ['POST_ANNOTATORS_3'], previous_services, add_bot_to_name) services.append(service) worker_tasks.extend(workers) diff --git a/core/connectors.py b/core/connectors.py index fccbafe5..30afbe0a 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -65,7 +65,7 @@ def __init__(self, intermediate_storage: Dict): async def send(self, payload): message_uuid = payload['message_uuid'] event = payload['event'] - response_text = payload['dialog'].utterances[-1].text + response_text = payload['dialog']['utterances'][-1]['text'] self.intermediate_storage[message_uuid] = response_text event.set() diff --git a/core/pipeline.py b/core/pipeline.py index 7773202e..e686849a 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -71,7 +71,7 @@ def get_next_services(self, done=None, waiting=None): if not {i.name for i in service.previous_services} <= done or service.is_input(): removed_names.add(name) - return {name: service for name, service in self.services.items() if name not in removed_names} + return [service for name, service in self.services.items() if name not in removed_names] def get_endpoint_services(self): return [s for s in self.services.values() if not s.next_services and 'responder' not in s.tags] @@ -99,4 +99,4 @@ def add_input_service(self, service): def simple_workflow_formatter(workflow_record): - return workflow_record['dialog'].to_dict() + return workflow_record['dialog'] diff --git a/core/run.py b/core/run.py index 86523a89..2b03e6e6 100644 --- a/core/run.py +++ b/core/run.py @@ -40,9 +40,10 @@ def response_logger(dialog_id, workflow): logger.info(f'{service_name}\t{round(done - send, 5)}\tseconds') -def prepare_agent(services, endpoint: Service, use_response_logger: bool): +def prepare_agent(services, endpoint: Service, input_serv: Service, use_response_logger: bool): pipeline = Pipeline(services) pipeline.add_responder_service(endpoint) + pipeline.add_input_service(input_serv) if use_response_logger: response_logger_callable = response_logger else: @@ -59,7 +60,7 @@ async def run(register_msg): response = await register_msg(utterance=msg, user_telegram_id=user_id, user_device_type='cmd', date_time=datetime.now(), location='lab', channel_type=CHANNEL, deadline_timestamp=None, require_response=True) - print('Bot: ', response['dialog'].utterances[-1].text) + print('Bot: ', response['dialog']['utterances'][-1]['text']) async def on_shutdown(app): @@ -149,10 +150,12 @@ def main(): services, workers, session = parse_old_config() if CHANNEL == 'cmd_client': - endpoint = Service('cmd_responder', EventSetOutputConnector(), None, 1, ['responder'], set()) + endpoint = Service('cmd_responder', EventSetOutputConnector().send, + StateManager.save_dialog_dict, 1, ['responder']) + input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) loop = asyncio.get_event_loop() loop.set_debug(args.debug) - register_msg, process = prepare_agent(services, endpoint, use_response_logger=args.response_logger) + register_msg, process = prepare_agent(services, endpoint, input_srv, use_response_logger=args.response_logger) future = asyncio.ensure_future(run(register_msg)) for i in workers: loop.create_task(i.call_service(process)) @@ -170,8 +173,10 @@ def main(): logging.shutdown() elif CHANNEL == 'http_client': intermediate_storage = {} - endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage), None, 1, ['responder']) - register_msg, process_callable = prepare_agent(services, endpoint) + endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage).send, + StateManager.save_dialog_dict, 1, ['responder']) + input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) + register_msg, process_callable = prepare_agent(services, endpoint, input_srv, args.response_logger) app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), on_shutdown) diff --git a/core/state_manager.py b/core/state_manager.py index 26306ecb..55b14d6e 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -37,7 +37,7 @@ def create_new_bot(persona: Optional[List[str]] = None): @staticmethod def create_new_human_utterance(text, user: Human, date_time, annotations=None, selected_skills=None): utt = HumanUtterance(text=text, - user=user, + user=user.to_dict(), date_time=date_time, annotations=annotations or HumanUtterance.annotations.default, selected_skills=selected_skills or HumanUtterance.selected_skills.default) @@ -74,6 +74,7 @@ def get_or_create_dialog(cls, user, location, channel_type, should_reset=False): else: exist_dialogs = Dialog.objects(human__exact=user) if not exist_dialogs: + bot = cls.create_new_bot() dialog = cls.create_new_dialog(human=user, bot=bot, location=location, channel_type=channel_type) else: @@ -173,7 +174,7 @@ def add_human_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, pa utterance = HUMAN_UTTERANCE_SCHEMA utterance['text'] = payload utterance['date_time'] = str(datetime.now()) - utterance['user'] = dialog['user'] + utterance['user'] = dialog['human'] dialog['utterances'].append(utterance) @staticmethod @@ -233,9 +234,9 @@ def save_dialog_dict(dialog: Dict, dialog_object: Dialog, payload=None): for utt in dialog['utterances'][::-1]: if not utt['id']: if utt['type'] == 'human': - utt_objects.append(HumanUtterance.from_dict(utt)) + utt_objects.append(HumanUtterance.make_from_dict(utt)) elif utt['type'] == 'bot': - utt_objects.append(BotUtterance.from_dict(utt)) + utt_objects.append(BotUtterance.make_from_dict(utt)) else: raise ValueError('utterance of unknown type') else: @@ -243,4 +244,9 @@ def save_dialog_dict(dialog: Dict, dialog_object: Dialog, payload=None): for utt in utt_objects[::-1]: dialog_object.utterances.append(utt) + dialog_object.human.update_from_dict(dialog['human']) + dialog_object.bot.update_from_dict(dialog['bot']) + dialog_object.human.save() + dialog_object.bot.save() + dialog_object.save() \ No newline at end of file diff --git a/core/state_schema.py b/core/state_schema.py index 8270327f..dffd5579 100644 --- a/core/state_schema.py +++ b/core/state_schema.py @@ -77,13 +77,12 @@ def to_dict(self): return {'id': str(self.id), 'user_type': 'bot', 'persona': self.persona, - 'attributes': str(self.attributes) + 'attributes': self.attributes } - @classmethod - def from_dict(cls, payload): - bot = cls() - - return bot + + def update_from_dict(self, payload): + self.persona = payload['persona'] + self.attributes = payload['attributes'] class Human(User): @@ -107,14 +106,14 @@ def to_dict(self): 'device_type': self.device_type, 'persona': self.persona, 'profile': self.profile, - 'attributes': str(self.attributes) + 'attributes': self.attributes } - @classmethod - def from_dict(cls, payload): - human = cls() - - return human + def update_from_dict(self, payload): + self.device_type = payload['device_type'] + self.persona = payload['persona'] + self.profile = payload['profile'] + self.attributes = payload['attributes'] class Utterance(DynamicDocument): @@ -133,16 +132,18 @@ class HumanUtterance(Utterance): selected_skills = DynamicField(default=[]) def to_dict(self): - return {'id': str(self.id), - 'text': self.text, - 'user': self.user, - 'annotations': self.annotations, - 'date_time': str(self.date_time), - 'selected_skills': self.selected_skills}, - 'type': 'human' + return { + 'id': str(self.id), + 'text': self.text, + 'user': self.user, + 'annotations': self.annotations, + 'date_time': str(self.date_time), + 'selected_skills': self.selected_skills, + 'type': 'human' + } @classmethod - def from_dict(cls, payload): + def make_from_dict(cls, payload): utterance = cls() utterance.id = payload['id'] utterance.text = payload['text'] @@ -166,14 +167,14 @@ def to_dict(self): 'confidence': self.confidence, 'text': self.text, 'orig_text': self.orig_text, - 'user': self.user.to_dict(), + 'user': self.user, 'annotations': self.annotations, 'date_time': str(self.date_time), 'type': 'bot' } @classmethod - def from_dict(cls, payload): + def make_from_dict(cls, payload): utterance = cls() utterance.id = payload['id'] utterance.text = payload['text'] @@ -206,7 +207,7 @@ def to_dict(self): } @classmethod - def from_dict(cls, payload): + def make_from_dict(cls, payload): dialog = cls() dialog.location = payload['location'] dialog.channel_type = payload['channel_type'] From 71c9691b530a07987e92a65e88f4f8deb878797f Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 24 Sep 2019 12:01:49 +0300 Subject: [PATCH 027/133] stress test and dummy services test setup --- tests/dummy_connectors_test_setup.py | 4 +- utils/http_api_stress_test.py | 60 ++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 utils/http_api_stress_test.py diff --git a/tests/dummy_connectors_test_setup.py b/tests/dummy_connectors_test_setup.py index c13eb3a1..b3fa225e 100644 --- a/tests/dummy_connectors_test_setup.py +++ b/tests/dummy_connectors_test_setup.py @@ -111,8 +111,8 @@ def main(): intermediate_storage = {} endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage).send, StateManager.save_dialog_dict, 1, ['responder']) - input = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) - register_msg, process_callable = prepare_agent(services, endpoint, input) + input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) + register_msg, process_callable = prepare_agent(services, endpoint, input_srv, False) app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), on_shutdown) diff --git a/utils/http_api_stress_test.py b/utils/http_api_stress_test.py new file mode 100644 index 00000000..d680c051 --- /dev/null +++ b/utils/http_api_stress_test.py @@ -0,0 +1,60 @@ +import aiohttp +import asyncio +import argparse +import csv +import json +from time import time +from random import random, randrange +import uuid +from statistics import mean, median + + +parser = argparse.ArgumentParser() +parser.add_argument('-u', '--url', type=str) +parser.add_argument('-pf', '--phrasesfile', help='name of the file with phrases for dialog', type=str, default="") +parser.add_argument('-of', '--outputfile', help='name of the output file', type=str, default='output.csv') +parser.add_argument('-mnu', '--minusers', type=int, default=1) +parser.add_argument('-mxu', '--maxusers', type=int, default=10) + +args = parser.parse_args() + +try: + with open(args.phrasesfile, 'r') as file: + payloads = [line.rstrip('\n') for line in file] +except Exception as e: + raise e + +async def perform_test_dialogue(session, url, uuid, payloads): + times = [] + for i in payloads: + request_body = {'user_id': uuid, 'payload': i} + start_time = time() + async with session.post(url, json=request_body) as resp: + response = await resp.json() + end_time = time() + if response['user_id'] != uuid: + print('INFO, request returned wrong uuid') + + times.append(end_time - start_time) + + return times + +async def run_users(url, payload, mnu, mxu): + async with aiohttp.ClientSession() as session: + for i in range(mnu, mxu + 1): + tasks = [] + for j in range(0, i): + user_id = uuid.uuid4().hex + tasks.append(asyncio.ensure_future(perform_test_dialogue(session, url, user_id, payload))) + responses = await asyncio.gather(*tasks) + times = [] + for resp in responses: + times.extend(resp) + + print(f'test No {i} finished: {max(times)} {min(times)} {mean(times)} {median(times)}') + + +if __name__ == '__main__': + loop = asyncio.get_event_loop() + future = asyncio.ensure_future(run_users(args.url, payloads, args.minusers, args.maxusers)) + loop.run_until_complete(future) From bb3ac2dc4cdae057d3b0039cb21ac1cc7fa8e298 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 24 Sep 2019 14:58:30 +0300 Subject: [PATCH 028/133] fix utt update state manager --- core/state_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core/state_manager.py b/core/state_manager.py index 55b14d6e..daf465b9 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -242,6 +242,7 @@ def save_dialog_dict(dialog: Dict, dialog_object: Dialog, payload=None): else: break for utt in utt_objects[::-1]: + utt.save() dialog_object.utterances.append(utt) dialog_object.human.update_from_dict(dialog['human']) From 0339235457c937cc6e56c4dc9439f55ffc196811 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 24 Sep 2019 17:34:24 +0300 Subject: [PATCH 029/133] style: minor style fixes, logger fixes, removed connection.py --- core/connection.py | 5 ---- tests/schema_test.py | 55 -------------------------------------------- 2 files changed, 60 deletions(-) delete mode 100644 core/connection.py delete mode 100644 tests/schema_test.py diff --git a/core/connection.py b/core/connection.py deleted file mode 100644 index f75469f5..00000000 --- a/core/connection.py +++ /dev/null @@ -1,5 +0,0 @@ -from mongoengine import connect - -from core.transform_config import DB_HOST, DB_PORT, DB_NAME - -state_storage = connect(host=DB_HOST, port=DB_PORT, db=DB_NAME) diff --git a/tests/schema_test.py b/tests/schema_test.py deleted file mode 100644 index d89ac1a4..00000000 --- a/tests/schema_test.py +++ /dev/null @@ -1,55 +0,0 @@ -from datetime import datetime -import uuid - -from core.state_schema import Human, Bot, Utterance, BotUtterance, Dialog, HumanUtterance -from core.connection import state_storage -from core.bot import BOT - -########################### Test case ####################################### - -# User.drop_collection() - - -state = {'version': '0.10.1', 'dialogs': []} -for d in Dialog.objects: - state['dialogs'].append(d.to_dict()) - -print(state) - -Human.drop_collection() - -Dialog.objects.delete() -Utterance.objects.delete() -BotUtterance.objects.delete() -HumanUtterance.objects.delete() -# User.objects.delete() -Human.objects.delete() - - -h_user = Human(user_telegram_id=str(uuid.uuid4())) - -h_utt_1 = Utterance(text='Привет!', user=h_user, date_time=datetime.utcnow()) -b_utt_1 = BotUtterance(text='Привет, я бот!', user=BOT, active_skill='chitchat', - confidence=0.85, date_time=datetime.utcnow()) - -h_utt_2 = Utterance(text='Как дела?', user=h_user, date_time=datetime.utcnow()) -b_utt_2 = BotUtterance(text='Хорошо, а у тебя как?', user=BOT, - active_skill='chitchat', - confidence=0.9333, date_time=datetime.utcnow()) - -h_utt_3 = Utterance(text='И у меня нормально. Когда родился Петр Первый?', user=h_user, date_time=datetime.utcnow()) -b_utt_3 = BotUtterance(text='в 1672 году', user=BOT, active_skill='odqa', confidence=0.74, - date_time=datetime.utcnow()) -print(b_utt_3.to_dict()) - - -# for d in Dialog.objects: -# print(d.to_dict()) - -state = {'version': '0.10.1', 'dialogs': []} -for d in Dialog.objects: - state['dialogs'].append(d.to_dict()) - -print(state) - - From 3b647b490ea5b6ec92a651b76e17a3a7c2ea40b8 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 24 Sep 2019 17:35:43 +0300 Subject: [PATCH 030/133] style: minor style fixes, logger fixes, removed connection.py --- .flake8 | 2 +- core/agent.py | 34 ++++++++++++++++----------- core/config_parser.py | 34 ++++++++++++++++----------- core/connectors.py | 2 +- core/run.py | 7 +++--- core/state_manager.py | 35 ++++++++++++++++++---------- core/state_schema.py | 16 ++++++++----- dev_requirements.txt | 3 ++- docs/source/conf.py | 2 +- state_formatters/dp_formatters.py | 4 +++- tests/dummy_connectors_test_setup.py | 8 +++---- utils/http_api_stress_test.py | 5 ++-- utils/http_api_test.py | 8 +++---- 13 files changed, 94 insertions(+), 66 deletions(-) diff --git a/.flake8 b/.flake8 index 839f398f..43ca0dd7 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] max-line-length=120 -ignore=D100,D101,D102,D103,D107 +ignore=D100,D101,D102,D103,D107,F403,F405 exclude=.git,__pycache__,build,dist diff --git a/core/agent.py b/core/agent.py index b7954df4..6cc88568 100644 --- a/core/agent.py +++ b/core/agent.py @@ -1,7 +1,6 @@ import asyncio from collections import defaultdict -from datetime import datetime from time import time from typing import Any, Optional, Callable, Hashable @@ -42,7 +41,7 @@ def flush_record(self, dialog_id: str): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') if self.response_logger_callable: - self.response_logger_callable(dialog_id, self.workflow) + self.response_logger_callable(self.workflow[dialog_id]) return self.workflow.pop(dialog_id) def register_service_request(self, dialog_id: str, service_name): @@ -68,7 +67,12 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res # Updating workflow with service response service = self.pipeline.get_service_by_name(service_name) if service: - self.workflow[dialog_id]['services'][service_name]['done'] = time() + service_data = self.workflow[dialog_id]['services'][service_name] + service_data['done'] = time() + try: + service_data['send'] + except KeyError: + service_data['send'] = None if response and service.state_processor_method: service.state_processor_method(dialog=workflow_record['dialog'], dialog_object=workflow_record['dialog_object'], @@ -78,7 +82,7 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res done, waiting = self.get_services_status(dialog_id) next_services = self.pipeline.get_next_services(done, waiting) - # Processing the case, when service is skill selector + # Processing the case, when service is a skill selector if service and service.is_selector(): selected_services = list(response.values())[0] result = [] @@ -88,16 +92,16 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res else: result.append(service) next_services = result + # send dialog workflow record to further logging operations: if self.process_logger_callable: - self.process_logger_callable(self.workflow['dialog_id']) # send dialog workflow record to further logging operations - + self.process_logger_callable(self.workflow['dialog_id']) + return next_services async def register_msg(self, utterance: str, user_telegram_id: Hashable, - user_device_type: Any, - date_time: datetime, location=Any, + user_device_type: Any, location=Any, channel_type=str, deadline_timestamp=None, - require_response=False, should_reset=False, **kwargs): + require_response=False, **kwargs): hold_flush = False user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) should_reset = True if utterance == TG_START_UTT else False @@ -105,15 +109,17 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, if require_response: event = asyncio.Event() kwargs['event'] = event - hold_flush = True - self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, - hold_flush=hold_flush, **kwargs) - await self.process(str(dialog.id), 'input', utterance) - if require_response: + self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, + hold_flush=True, **kwargs) + await self.process(str(dialog.id), 'input', utterance) await event.wait() workflow_record = self.get_workflow_record(str(dialog.id)) self.flush_record(str(dialog.id)) return workflow_record + else: + self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, + hold_flush=hold_flush, **kwargs) + await self.process(str(dialog.id), 'input', utterance) async def process(self, dialog_id, service_name=None, response=None): workflow_record = self.get_workflow_record(dialog_id) diff --git a/core/config_parser.py b/core/config_parser.py index 85924aeb..938d4835 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -1,8 +1,10 @@ import aiohttp import asyncio -from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, RESPONSE_SELECTORS, POSTPROCESSORS -from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, QueueListenerBatchifyer +from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS,\ + RESPONSE_SELECTORS, POSTPROCESSORS +from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, \ + QueueListenerBatchifyer from core.pipeline import Service, simple_workflow_formatter from core.state_manager import StateManager @@ -12,8 +14,9 @@ def parse_old_config(): worker_tasks = [] session = aiohttp.ClientSession() - def make_service_from_config_rec(conf_record, session, state_processor_method, tags, names_previous_services, name_modifier=None): - worker_tasks = [] + def make_service_from_config_rec(conf_record, sess, state_processor_method, tags, names_previous_services, + name_modifier=None): + _worker_tasks = [] if name_modifier: name = name_modifier(conf_record['name']) else: @@ -22,9 +25,11 @@ def make_service_from_config_rec(conf_record, session, state_processor_method, t batch_size = conf_record.get('batch_size', 1) url = conf_record['url'] + connector_func = None + if conf_record['protocol'] == 'http': if batch_size == 1 and isinstance(url, str): - connector_func = HTTPConnector(session, url, formatter, conf_record['name']).send + connector_func = HTTPConnector(sess, url, formatter, conf_record['name']).send else: queue = asyncio.Queue() connector_func = AioQueueConnector(queue).send # worker task and queue connector @@ -33,13 +38,15 @@ def make_service_from_config_rec(conf_record, session, state_processor_method, t else: urls = url for u in urls: - worker_tasks.append(QueueListenerBatchifyer(session, u, formatter, - name, queue, batch_size)) - - service = Service(name, connector_func, state_processor_method, batch_size, - tags, names_previous_services, simple_workflow_formatter) + _worker_tasks.append(QueueListenerBatchifyer(sess, u, formatter, + name, queue, batch_size)) + if connector_func is None: + raise ValueError(f'No connector function is defined while making a service {name}.') + + _service = Service(name, connector_func, state_processor_method, batch_size, + tags, names_previous_services, simple_workflow_formatter) - return service, worker_tasks + return _service, _worker_tasks def add_bot_to_name(name): return f'bot_{name}' @@ -64,7 +71,7 @@ def add_bot_to_name(name): if ANNOTATORS_3: for anno in ANNOTATORS_3: service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, - ['ANNOTATORS_3'], previous_services) + ['ANNOTATORS_3'], previous_services) services.append(service) worker_tasks.extend(workers) @@ -129,7 +136,8 @@ def add_bot_to_name(name): previous_services = {i.name for i in services if 'POST_ANNOTATORS_2' in i.tags} for anno in ANNOTATORS_3: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, ['POST_ANNOTATORS_3'], + service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, + ['POST_ANNOTATORS_3'], previous_services, add_bot_to_name) services.append(service) worker_tasks.extend(workers) diff --git a/core/connectors.py b/core/connectors.py index 30afbe0a..98f5a8fa 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -1,6 +1,6 @@ import asyncio import aiohttp -from typing import Dict, List, Callable +from typing import Dict, Callable class HTTPConnector: diff --git a/core/run.py b/core/run.py index 2b03e6e6..d4e2fa33 100644 --- a/core/run.py +++ b/core/run.py @@ -12,7 +12,6 @@ from core.connectors import EventSetOutputConnector, HttpOutputConnector from core.config_parser import parse_old_config from core.state_manager import StateManager -from core.transform_config import DEBUG logger = logging.getLogger('service_logger') logger.setLevel(logging.INFO) @@ -31,8 +30,8 @@ CHANNEL = args.channel -def response_logger(dialog_id, workflow): - for service_name, service_data in workflow[dialog_id]['services'].items(): +def response_logger(workflow_record): + for service_name, service_data in workflow_record['services'].items(): done = service_data['done'] send = service_data['send'] if send is None or done is None: @@ -58,7 +57,7 @@ async def run(register_msg): msg = input(f'You ({user_id}): ').strip() if msg: response = await register_msg(utterance=msg, user_telegram_id=user_id, user_device_type='cmd', - date_time=datetime.now(), location='lab', channel_type=CHANNEL, + location='lab', channel_type=CHANNEL, deadline_timestamp=None, require_response=True) print('Bot: ', response['dialog']['utterances'][-1]['text']) diff --git a/core/state_manager.py b/core/state_manager.py index daf465b9..e548a692 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -1,15 +1,20 @@ from datetime import datetime from typing import Hashable, Any, Optional, Dict, TypeVar, List -from copy import deepcopy -from core.state_schema import User, Human, Bot, HumanUtterance, BotUtterance, Dialog, HUMAN_UTTERANCE_SCHEMA, BOT_UTTERANCE_SCHEMA, BOT_SCHEMA, HUMAN_SCHEMA, DIALOG_SCHEMA -from core.connection import connect +from mongoengine import connect + +from core.state_schema import User, Human, Bot, HumanUtterance, BotUtterance, Dialog, HUMAN_UTTERANCE_SCHEMA,\ + BOT_UTTERANCE_SCHEMA +from core.transform_config import DB_HOST, DB_PORT, DB_NAME + userT = TypeVar('userT', bound=User) class StateManager: + state_storage = connect(host=DB_HOST, port=DB_PORT, db=DB_NAME) + @staticmethod def create_new_dialog(human, bot, location=None, channel_type=None): dialog = Dialog(human=human, bot=bot, location=location or Dialog.location.default, @@ -35,7 +40,8 @@ def create_new_bot(persona: Optional[List[str]] = None): return bot @staticmethod - def create_new_human_utterance(text, user: Human, date_time, annotations=None, selected_skills=None): + def create_new_human_utterance(text, user: Human, date_time, annotations=None, + selected_skills=None): utt = HumanUtterance(text=text, user=user.to_dict(), date_time=date_time, @@ -45,7 +51,8 @@ def create_new_human_utterance(text, user: Human, date_time, annotations=None, s return utt @staticmethod - def create_new_bot_utterance(orig_text, text, user, date_time, active_skill, confidence, annotations=None): + def create_new_bot_utterance(orig_text, text, user, date_time, active_skill, confidence, + annotations=None): utt = BotUtterance(orig_text=orig_text, text=text, user=user, @@ -86,21 +93,24 @@ def get_or_create_dialog(cls, user, location, channel_type, should_reset=False): def add_human_utterance(cls, dialog: Dialog, user: Human, text: str, date_time: datetime, annotation: Optional[dict] = None, selected_skill: Optional[dict] = None) -> None: - utterance = cls.create_new_human_utterance(text, user, date_time, annotation, selected_skill) + utterance = cls.create_new_human_utterance(text, user, date_time, annotation, + selected_skill) dialog.utterances.append(utterance) dialog.save() @classmethod def add_bot_utterance(cls, dialog: Dialog, orig_text: str, date_time: datetime, active_skill: str, - confidence: float, text: str = None, annotation: Optional[dict] = None) -> None: + confidence: float, text: str = None, + annotation: Optional[dict] = None) -> None: if not text: text = orig_text try: bot = dialog.utterances[-2].user except IndexError: bot = cls.create_new_bot() - utterance = cls.create_new_bot_utterance(orig_text, text, bot, date_time, active_skill, confidence, + utterance = cls.create_new_bot_utterance(orig_text, text, bot, date_time, active_skill, + confidence, annotation) dialog.utterances.append(utterance) dialog.save() @@ -170,7 +180,8 @@ def update_bot(bot: Bot, active_skill: Dict): bot.save() @classmethod - def add_human_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs) -> None: + def add_human_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, + **kwargs) -> None: utterance = HUMAN_UTTERANCE_SCHEMA utterance['text'] = payload utterance['date_time'] = str(datetime.now()) @@ -188,7 +199,6 @@ def update_human_dict(human: Dict, active_skill: Dict): else: human['attributes'][attr_name] = attr_value - @staticmethod def update_bot_dict(bot: Dict, active_skill: Dict): attributes = active_skill.get('bot_attributes', {}) @@ -199,7 +209,8 @@ def update_bot_dict(bot: Dict, active_skill: Dict): bot['attributes'][attr_name] = attr_value @classmethod - def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs) -> None: + def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, + **kwargs) -> None: active_skill_name = list(payload.values())[0] active_skill = dialog['utterances'][-1]['selected_skills'].get(active_skill_name, None) if not active_skill: @@ -250,4 +261,4 @@ def save_dialog_dict(dialog: Dict, dialog_object: Dialog, payload=None): dialog_object.human.save() dialog_object.bot.save() - dialog_object.save() \ No newline at end of file + dialog_object.save() diff --git a/core/state_schema.py b/core/state_schema.py index dffd5579..8972f2f8 100644 --- a/core/state_schema.py +++ b/core/state_schema.py @@ -1,10 +1,7 @@ -import uuid - -from mongoengine import DynamicDocument, ReferenceField, ListField, StringField, DynamicField, \ - DateTimeField, FloatField, DictField, ObjectIdField +from mongoengine import DynamicDocument, ReferenceField, ListField, StringField, DynamicField, DateTimeField,\ + FloatField, DictField from . import STATE_API_VERSION -from datetime import datetime HUMAN_UTTERANCE_SCHEMA = { 'id': None, @@ -61,6 +58,7 @@ 'bot': None } + class User(DynamicDocument): persona = ListField(default=[]) attributes = DictField() @@ -70,6 +68,9 @@ class User(DynamicDocument): def to_dict(self): raise NotImplementedError + def update_from_dict(self, *args, **kwargs): + raise NotImplementedError + class Bot(User): @@ -108,7 +109,7 @@ def to_dict(self): 'profile': self.profile, 'attributes': self.attributes } - + def update_from_dict(self, payload): self.device_type = payload['device_type'] self.persona = payload['persona'] @@ -127,6 +128,9 @@ class Utterance(DynamicDocument): def to_dict(self): raise NotImplementedError + def make_from_dict(self, *args, **kwargs): + raise NotImplementedError + class HumanUtterance(Utterance): selected_skills = DynamicField(default=[]) diff --git a/dev_requirements.txt b/dev_requirements.txt index 5690a6d1..23d0547c 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,4 +1,5 @@ sphinx==2.1.2 recommonmark==0.5.0 sphinx_rtd_theme -Pygments==2.4.2 \ No newline at end of file +Pygments==2.4.2 +flake8 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 9822b882..f2cf0512 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -193,4 +193,4 @@ # -- Options for todo extension ---------------------------------------------- # If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = True \ No newline at end of file +todo_include_todos = True diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 6cf35c94..2db6c205 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -105,7 +105,9 @@ def odqa_formatter(payload: Any, model_args_names=('question_raw',), mode='in'): return last_utterances(payload, model_args_names) elif mode == 'out': return {"text": payload[0], - "confidence": 0.5} + "confidence": 0.5, + "persona": ["Я котик."], + "ololo": "ololo"} def chitchat_formatter(payload: Any, model_args_names=('q',), mode='in'): diff --git a/tests/dummy_connectors_test_setup.py b/tests/dummy_connectors_test_setup.py index b3fa225e..556b6827 100644 --- a/tests/dummy_connectors_test_setup.py +++ b/tests/dummy_connectors_test_setup.py @@ -2,14 +2,11 @@ import argparse import uuid - from aiohttp import web from datetime import datetime -from string import hexdigits from random import choice -from core.agent import Agent -from core.pipeline import Pipeline, Service +from core.pipeline import Service from core.connectors import HttpOutputConnector from core.config_parser import parse_old_config from core.state_manager import StateManager @@ -107,7 +104,8 @@ def main(): if s.is_selector(): s.connector_func = DummySelectorConnector(['chitchat', 'odqa'], 0.01, s.name).send else: - s.connector_func = DummyConnector(['we have a phrase', 'and another one', 'not so short one'], 0.01, s.name).send + s.connector_func = DummyConnector(['we have a phrase', 'and another one', 'not so short one'], 0.01, + s.name).send intermediate_storage = {} endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage).send, StateManager.save_dialog_dict, 1, ['responder']) diff --git a/utils/http_api_stress_test.py b/utils/http_api_stress_test.py index d680c051..3275eee0 100644 --- a/utils/http_api_stress_test.py +++ b/utils/http_api_stress_test.py @@ -1,10 +1,7 @@ import aiohttp import asyncio import argparse -import csv -import json from time import time -from random import random, randrange import uuid from statistics import mean, median @@ -24,6 +21,7 @@ except Exception as e: raise e + async def perform_test_dialogue(session, url, uuid, payloads): times = [] for i in payloads: @@ -39,6 +37,7 @@ async def perform_test_dialogue(session, url, uuid, payloads): return times + async def run_users(url, payload, mnu, mxu): async with aiohttp.ClientSession() as session: for i in range(mnu, mxu + 1): diff --git a/utils/http_api_test.py b/utils/http_api_test.py index 26a8b2d3..8504377a 100644 --- a/utils/http_api_test.py +++ b/utils/http_api_test.py @@ -4,7 +4,7 @@ import csv import json from time import time -from random import random, randrange +from random import randrange import uuid ''' @@ -18,7 +18,6 @@ structure of phrase file (-pf) simple text file. One phrase per line ''' - parser = argparse.ArgumentParser() parser.add_argument('-u', '--url', type=str) parser.add_argument('-uc', '--usercount', help='count of test users, which will send the message', @@ -29,7 +28,6 @@ parser.add_argument('-df', '--dialogfile', help='name of the file with predefined dialogs', type=str, default="") parser.add_argument('-of', '--outputfile', help='name of the output file', type=str, default='output.csv') - args = parser.parse_args() payloads = {} @@ -45,7 +43,8 @@ phrases = [line.rstrip('\n') for line in file] except Exception as e: raise e - payloads = {uuid.uuid4().hex: [phrases[randrange(len(phrases))] for i in range(args.phrasecount)] for i in range(args.usercount)} + payloads = {uuid.uuid4().hex: [phrases[randrange(len(phrases))] for j in range(args.phrasecount)] for i in + range(args.usercount)} else: raise ValueError('You should provide either predefined dialog (-df) or file with phrases (-pf)') @@ -80,6 +79,7 @@ async def run(url, payloads, out_filename): for row in result: writer.writerow(row) + if __name__ == '__main__': loop = asyncio.get_event_loop() future = asyncio.ensure_future(run(args.url, payloads, args.outputfile)) From 1ba1e3b165fc387a7786cef5b94857b50326bfb3 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 24 Sep 2019 17:47:28 +0300 Subject: [PATCH 031/133] fix: remove 'type' form utterance level, fix odqa formatter --- core/state_manager.py | 6 +++--- core/state_schema.py | 8 ++------ state_formatters/dp_formatters.py | 4 +--- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/core/state_manager.py b/core/state_manager.py index e548a692..b61e9431 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -244,12 +244,12 @@ def save_dialog_dict(dialog: Dict, dialog_object: Dialog, payload=None): utt_objects = [] for utt in dialog['utterances'][::-1]: if not utt['id']: - if utt['type'] == 'human': + if utt['user']['user_type'] == 'human': utt_objects.append(HumanUtterance.make_from_dict(utt)) - elif utt['type'] == 'bot': + elif utt['user']['user_type'] == 'bot': utt_objects.append(BotUtterance.make_from_dict(utt)) else: - raise ValueError('utterance of unknown type') + raise ValueError('unknown user type in the utterance') else: break for utt in utt_objects[::-1]: diff --git a/core/state_schema.py b/core/state_schema.py index 8972f2f8..9c185d72 100644 --- a/core/state_schema.py +++ b/core/state_schema.py @@ -10,7 +10,6 @@ 'annotations': {}, 'date_time': None, 'selected_skills': {}, - 'type': 'human' } BOT_UTTERANCE_SCHEMA = { @@ -22,7 +21,6 @@ 'user': {}, 'annotations': {}, 'date_time': None, - 'type': 'bot' } BOT_SCHEMA = { @@ -142,8 +140,7 @@ def to_dict(self): 'user': self.user, 'annotations': self.annotations, 'date_time': str(self.date_time), - 'selected_skills': self.selected_skills, - 'type': 'human' + 'selected_skills': self.selected_skills } @classmethod @@ -173,8 +170,7 @@ def to_dict(self): 'orig_text': self.orig_text, 'user': self.user, 'annotations': self.annotations, - 'date_time': str(self.date_time), - 'type': 'bot' + 'date_time': str(self.date_time) } @classmethod diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 2db6c205..6cf35c94 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -105,9 +105,7 @@ def odqa_formatter(payload: Any, model_args_names=('question_raw',), mode='in'): return last_utterances(payload, model_args_names) elif mode == 'out': return {"text": payload[0], - "confidence": 0.5, - "persona": ["Я котик."], - "ololo": "ololo"} + "confidence": 0.5} def chitchat_formatter(payload: Any, model_args_names=('q',), mode='in'): From 0ef4116f5531a6fc3f1f172af548d70de9e9a27a Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Wed, 25 Sep 2019 13:15:17 +0300 Subject: [PATCH 032/133] refactor: rename is_selector() -> is_sselector() fix: formatters feat: change rselectror api doc: updated docs on services output api --- config.py | 2 +- core/agent.py | 2 +- core/pipeline.py | 2 +- core/state_manager.py | 9 ++++++--- docs/source/api/services_http_api.rst | 17 +++++++++++++++-- state_formatters/dp_formatters.py | 12 +++++------- tests/dummy_connectors_test_setup.py | 2 +- 7 files changed, 30 insertions(+), 16 deletions(-) diff --git a/config.py b/config.py index 74102fd6..5ad25f93 100644 --- a/config.py +++ b/config.py @@ -35,7 +35,7 @@ }, "profile_handler": True, "dockerfile": "dockerfile_skill_cpu", - "formatter": odqa_formatter + "formatter": chitchat_formatter } ] diff --git a/core/agent.py b/core/agent.py index 6cc88568..8e371ac3 100644 --- a/core/agent.py +++ b/core/agent.py @@ -83,7 +83,7 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res next_services = self.pipeline.get_next_services(done, waiting) # Processing the case, when service is a skill selector - if service and service.is_selector(): + if service and service.is_sselector(): selected_services = list(response.values())[0] result = [] for service in next_services: diff --git a/core/pipeline.py b/core/pipeline.py index e686849a..b3d662ac 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -15,7 +15,7 @@ def __init__(self, name, connector_func, state_processor_method=None, self.previous_services = set() self.next_services = set() - def is_selector(self): + def is_sselector(self): return 'selector' in self.tags def is_responder(self): diff --git a/core/state_manager.py b/core/state_manager.py index b61e9431..db712858 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -211,7 +211,10 @@ def update_bot_dict(bot: Dict, active_skill: Dict): @classmethod def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs) -> None: - active_skill_name = list(payload.values())[0] + rselector_data = list(payload.values())[0] + active_skill_name = rselector_data['skill_name'] + new_text = rselector_data['text'] + new_confidence = rselector_data['confidence'] active_skill = dialog['utterances'][-1]['selected_skills'].get(active_skill_name, None) if not active_skill: raise ValueError(f'provided {payload} is not valid') @@ -219,11 +222,11 @@ def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payl cls.update_bot_dict(dialog['bot'], active_skill) utterance = BOT_UTTERANCE_SCHEMA - utterance['text'] = active_skill['text'] + utterance['text'] = new_text utterance['orig_text'] = active_skill['text'] utterance['date_time'] = str(datetime.now()) utterance['active_skill'] = active_skill_name - utterance['confidence'] = active_skill['confidence'] + utterance['confidence'] = new_confidence utterance['user'] = dialog['bot'] dialog['utterances'].append(utterance) diff --git a/docs/source/api/services_http_api.rst b/docs/source/api/services_http_api.rst index e0586ffa..2d2c11e2 100644 --- a/docs/source/api/services_http_api.rst +++ b/docs/source/api/services_http_api.rst @@ -80,15 +80,27 @@ But it's possible to extend it with ``human_attributes`` and ``bot_attributes`` {"text": "hello", "confidence": 0.33, "human_attributes": {"name": "Vasily"}, "bot_attributes": {"persona": ["I like swimming.", "I have a nice swimming suit."]}} +Everything sent to ``human_attributes`` and ``bot_attributes`` keys will update `user` field in the same +utterance for the human and in the next utterance for the bot. Please refer to user_state_api_ to find more +information about the **User** object updates. + +Also it's possible for a skill to send any additional key to the state: + + .. code:: json + + {"text": "hello", "confidence": 0.33, "any_key": "any_value"} + + Response Selector ================= Unlike Skill Selector, Response Selector should select a *single* skill responsible for generation of the -final response shown to the user. The expected result is a name of the selected skill: +final response shown to the user. The expected result is a name of the selected skill, text (may be +overwritten from the original skill response) and confidence (also may be overwritten): .. code:: json - "chitchat" + {"skill_name": "chitchat", "text": "Hello, Joe!", "confidence": 0.3} Postprocessor ============= @@ -108,4 +120,5 @@ utterance shown to the user, and the original skill answer will go to the ``orig .. _state: https://deeppavlov-agent.readthedocs.io/en/latest/_static/api.html .. _config file: https://github.com/deepmipt/dp-agent/blob/master/config.py .. _formatters: https://github.com/deepmipt/dp-agent/blob/master/state_formatters/dp_formatters.py +.. _user_state_api: https://deeppavlov-agent.readthedocs.io/en/latest/api/user_state_api.html diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 6cf35c94..90e7ff53 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -90,13 +90,11 @@ def chitchat_odqa_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': - response = [] - for el in payload: - class_name = el[0] - if class_name in ['speech', 'negative']: - response.append('chitchat') - else: - response.append('odqa') + class_name = payload[0] + if class_name in ['speech', 'negative']: + response = ['chitchat'] + else: + response = ['odqa'] return response diff --git a/tests/dummy_connectors_test_setup.py b/tests/dummy_connectors_test_setup.py index 556b6827..94c92220 100644 --- a/tests/dummy_connectors_test_setup.py +++ b/tests/dummy_connectors_test_setup.py @@ -101,7 +101,7 @@ def main(): for s in services: if 'RESPONSE_SELECTORS' in s.tags: continue - if s.is_selector(): + if s.is_sselector(): s.connector_func = DummySelectorConnector(['chitchat', 'odqa'], 0.01, s.name).send else: s.connector_func = DummyConnector(['we have a phrase', 'and another one', 'not so short one'], 0.01, From efb6984565285d3bdc38e86b85a6896bae777e83 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Wed, 25 Sep 2019 15:48:51 +0300 Subject: [PATCH 033/133] refactor: change confidence rs api, remove legacy rs --- core/connectors.py | 6 ++++-- models/response_selector.py | 14 -------------- 2 files changed, 4 insertions(+), 16 deletions(-) delete mode 100644 models/response_selector.py diff --git a/core/connectors.py b/core/connectors.py index 98f5a8fa..fbb944d4 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -54,8 +54,10 @@ async def call_service(self, process_callable): class ConfidenceResponseSelectorConnector: async def send(self, payload: Dict): response = payload['utterances'][-1]['selected_skills'] - skill_name = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0][0] - return {'confidence_response_selector': skill_name} + best_skill = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0] + return {'confidence_response_selector': {'skill_name': best_skill[0], + 'text': best_skill[1]['text'], + 'confidence': best_skill[1]['confidence']}} class HttpOutputConnector: diff --git a/models/response_selector.py b/models/response_selector.py deleted file mode 100644 index e302d338..00000000 --- a/models/response_selector.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import Dict, List - - -class ConfidenceResponseSelector: - """Select a single response for each dialog turn. - """ - - def __call__(self, state: Dict) -> List[Dict[str, List[str]]]: - skill_names = [] - responses = [d['utterances'][-1]['selected_skills'] for d in state['dialogs']] - for r in responses: - sr = sorted(r.items(), key=lambda x: x[1]['confidence'], reverse=True)[0] - skill_names.append(sr[0]) - return [{'confidence_response_selector': sn} for sn in skill_names] From c23fc4fd9593046eb8c504776a5f434ca5d4247a Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 27 Sep 2019 14:34:21 +0300 Subject: [PATCH 034/133] feat: update mongo image, take db vars from environment --- config.py | 10 +++++----- docker-compose.yml | 2 +- generate_composefile.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/config.py b/config.py index 88d890c5..3f29d390 100644 --- a/config.py +++ b/config.py @@ -1,11 +1,11 @@ -from functools import partial +from os import getenv from state_formatters.dp_formatters import * -DB_NAME = 'test' -DB_HOST = '127.0.0.1' -DB_PORT = 27017 -DB_PATH = '/data/db' +DB_NAME = getenv('DB_NAME', 'test') +DB_HOST = getenv('DB_HOST', '127.0.0.1') +DB_PORT = getenv('DB_PORT', 27017) +DB_PATH = getenv('DB_PATH', '/data/db') MAX_WORKERS = 4 diff --git a/docker-compose.yml b/docker-compose.yml index ab261150..e4f33a67 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -57,7 +57,7 @@ services: - ${EXTERNAL_FOLDER}/.deeppavlov:/root/.deeppavlov mongo: command: mongod - image: mongo:3.2.0 + image: mongo:4.0.0 ports: - 27017:27017 volumes: diff --git a/generate_composefile.py b/generate_composefile.py index 0f503aa1..011014de 100644 --- a/generate_composefile.py +++ b/generate_composefile.py @@ -24,7 +24,7 @@ MONGO_BASIC = { 'mongo': {'command': 'mongod', - 'image': 'mongo:3.2.0', + 'image': 'mongo:4.0.0', 'ports': ['{}:27017'], # map port to none standard port, to avoid conflicts with locally installed mongodb. 'volumes': ['/var/run/docker.sock:/var/run/docker.sock', f'{DB_PATH}:/root/data/db']} From f23a01e67c89d41c1d59179631e9112eefe565c6 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 27 Sep 2019 20:34:21 +0300 Subject: [PATCH 035/133] feat: service logger --- core/agent.py | 43 ++++++++++++++++++++++++++----------------- core/connectors.py | 10 +++++++--- core/run.py | 6 +++--- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/core/agent.py b/core/agent.py index 8e371ac3..c6041dad 100644 --- a/core/agent.py +++ b/core/agent.py @@ -47,32 +47,34 @@ def flush_record(self, dialog_id: str): def register_service_request(self, dialog_id: str, service_name): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - self.workflow[dialog_id]['services'][service_name] = {'send': time(), 'done': None} + self.workflow[dialog_id]['services'][service_name] = {'send': True, 'done': False, 'send_time': time(), + 'done_time': None} def get_services_status(self, dialog_id: str): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') done, waiting = set(), set() for key, value in self.workflow[dialog_id]['services'].items(): - if value['done'] is not None: + if value['done']: done.add(key) else: waiting.add(key) return done, waiting - def process_service_response(self, dialog_id: str, service_name: str = None, response: str = None): + def process_service_response(self, dialog_id: str, service_name: str = None, response: Any = None, + response_time: float = None): workflow_record = self.get_workflow_record(dialog_id) # Updating workflow with service response service = self.pipeline.get_service_by_name(service_name) if service: service_data = self.workflow[dialog_id]['services'][service_name] - service_data['done'] = time() - try: - service_data['send'] - except KeyError: - service_data['send'] = None + service_data['done'] = True + service_data['done_time'] = response_time + if service_name == 'input': + service_data['send'] = True + service_data['send_time'] = service_data['done_time'] if response and service.state_processor_method: service.state_processor_method(dialog=workflow_record['dialog'], dialog_object=workflow_record['dialog_object'], @@ -88,7 +90,8 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res result = [] for service in next_services: if service.name not in selected_services: - self.workflow[dialog_id]['services'][service.name] = {'done': time(), 'send': None} + self.workflow[dialog_id]['services'][service.name] = {'done': True, 'send': False, + 'send_time': None, 'done_time': None} else: result.append(service) next_services = result @@ -111,7 +114,7 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, kwargs['event'] = event self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, hold_flush=True, **kwargs) - await self.process(str(dialog.id), 'input', utterance) + await self.process(str(dialog.id), 'input', utterance, time()) await event.wait() workflow_record = self.get_workflow_record(str(dialog.id)) self.flush_record(str(dialog.id)) @@ -119,11 +122,11 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, else: self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, hold_flush=hold_flush, **kwargs) - await self.process(str(dialog.id), 'input', utterance) + await self.process(str(dialog.id), 'input', utterance, time()) - async def process(self, dialog_id, service_name=None, response=None): + async def process(self, dialog_id, service_name=None, response: Any = None, response_time: float = None): workflow_record = self.get_workflow_record(dialog_id) - next_services = self.process_service_response(dialog_id, service_name, response) + next_services = self.process_service_response(dialog_id, service_name, response, response_time) service_requests = [] has_responder = [] @@ -138,10 +141,16 @@ async def process(self, dialog_id, service_name=None, response=None): tasks = [] for service, response in zip(next_services, responses): - if response is not None: - if isinstance(response, Exception): - raise response - tasks.append(self.process(dialog_id, service.name, response)) + if response is None: + r = [None] + rt = time() + else: + r = response[0] + rt = response[1] + if r is not None: + if isinstance(r, Exception): + raise r + tasks.append(self.process(dialog_id, service.name, r, rt)) await asyncio.gather(*tasks) if has_responder: # TODO(Pugin): this part breaks some processing logic on the end diff --git a/core/connectors.py b/core/connectors.py index fbb944d4..c3677429 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -1,5 +1,6 @@ import asyncio import aiohttp +import time from typing import Dict, Callable @@ -13,7 +14,8 @@ def __init__(self, session: aiohttp.ClientSession, url: str, formatter: Callable async def send(self, payload: Dict): async with self.session.post(self.url, json=self.formatter([payload])) as resp: response = await resp.json() - return {self.service_name: self.formatter(response[0], mode='out')} + response_time = time.time() + return {self.service_name: self.formatter(response[0], mode='out')}, response_time class AioQueueConnector: @@ -44,9 +46,11 @@ async def call_service(self, process_callable): tasks = [] async with self.session.post(self.url, json=self.formatter(batch)) as resp: response = await resp.json() + response_time = time.time() for dialog, response_text in zip(batch, response): tasks.append(process_callable(dialog['id'], self.service_name, - {self.service_name: self.formatter(response_text, mode='out')})) + {self.service_name: self.formatter(response_text, mode='out')}, + response_time)) await asyncio.gather(*tasks) await asyncio.sleep(0.1) @@ -57,7 +61,7 @@ async def send(self, payload: Dict): best_skill = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0] return {'confidence_response_selector': {'skill_name': best_skill[0], 'text': best_skill[1]['text'], - 'confidence': best_skill[1]['confidence']}} + 'confidence': best_skill[1]['confidence']}}, time.time() class HttpOutputConnector: diff --git a/core/run.py b/core/run.py index d4e2fa33..815f8f10 100644 --- a/core/run.py +++ b/core/run.py @@ -32,9 +32,9 @@ def response_logger(workflow_record): for service_name, service_data in workflow_record['services'].items(): - done = service_data['done'] - send = service_data['send'] - if send is None or done is None: + done = service_data['done_time'] + send = service_data['send_time'] + if not send or not done: continue logger.info(f'{service_name}\t{round(done - send, 5)}\tseconds') From d706c6309c3ac1c1b9ac3e9db39109cf7e8c898f Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 27 Sep 2019 21:41:15 +0300 Subject: [PATCH 036/133] fix: schema copy --- core/state_manager.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/state_manager.py b/core/state_manager.py index db712858..7efc691f 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -1,5 +1,6 @@ from datetime import datetime from typing import Hashable, Any, Optional, Dict, TypeVar, List +from copy import deepcopy from mongoengine import connect @@ -182,7 +183,7 @@ def update_bot(bot: Bot, active_skill: Dict): @classmethod def add_human_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs) -> None: - utterance = HUMAN_UTTERANCE_SCHEMA + utterance = deepcopy(HUMAN_UTTERANCE_SCHEMA) utterance['text'] = payload utterance['date_time'] = str(datetime.now()) utterance['user'] = dialog['human'] @@ -221,7 +222,7 @@ def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payl cls.update_human_dict(dialog['human'], active_skill) cls.update_bot_dict(dialog['bot'], active_skill) - utterance = BOT_UTTERANCE_SCHEMA + utterance = deepcopy(BOT_UTTERANCE_SCHEMA) utterance['text'] = new_text utterance['orig_text'] = active_skill['text'] utterance['date_time'] = str(datetime.now()) From 1d31c98f6eab47d888ac5fa609840a49b47079b8 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Sun, 29 Sep 2019 21:55:13 +0300 Subject: [PATCH 037/133] slightly reworked process and connectors architecture --- core/agent.py | 48 +++++++--------------------- core/config_parser.py | 11 +++++-- core/connectors.py | 43 ++++++++++++++++++------- core/run.py | 4 +-- tests/dummy_connectors_test_setup.py | 19 ++++++++--- 5 files changed, 68 insertions(+), 57 deletions(-) diff --git a/core/agent.py b/core/agent.py index c6041dad..2d7134a0 100644 --- a/core/agent.py +++ b/core/agent.py @@ -72,13 +72,14 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res service_data = self.workflow[dialog_id]['services'][service_name] service_data['done'] = True service_data['done_time'] = response_time - if service_name == 'input': - service_data['send'] = True - service_data['send_time'] = service_data['done_time'] if response and service.state_processor_method: service.state_processor_method(dialog=workflow_record['dialog'], dialog_object=workflow_record['dialog_object'], payload=response) + # Flush record and return zero next services if service is is_responder + if service.is_responder(): + self.flush_record(dialog_id) + return [] # Calculating next steps done, waiting = self.get_services_status(dialog_id) @@ -105,23 +106,19 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, user_device_type: Any, location=Any, channel_type=str, deadline_timestamp=None, require_response=False, **kwargs): - hold_flush = False user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) should_reset = True if utterance == TG_START_UTT else False dialog = self.state_manager.get_or_create_dialog(user, location, channel_type, should_reset=should_reset) if require_response: event = asyncio.Event() kwargs['event'] = event - self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, - hold_flush=True, **kwargs) + self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) + self.register_service_request(str(dialog.id), 'input') await self.process(str(dialog.id), 'input', utterance, time()) await event.wait() - workflow_record = self.get_workflow_record(str(dialog.id)) - self.flush_record(str(dialog.id)) - return workflow_record + return self.get_workflow_record(str(dialog.id)) else: - self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, - hold_flush=hold_flush, **kwargs) + self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) await self.process(str(dialog.id), 'input', utterance, time()) async def process(self, dialog_id, service_name=None, response: Any = None, response_time: float = None): @@ -129,32 +126,11 @@ async def process(self, dialog_id, service_name=None, response: Any = None, resp next_services = self.process_service_response(dialog_id, service_name, response, response_time) service_requests = [] - has_responder = [] for service in next_services: self.register_service_request(dialog_id, service.name) payload = service.apply_workflow_formatter(workflow_record) - service_requests.append(service.connector_func(payload)) - if service.is_responder(): - has_responder.append(service) + service_requests.append( + service.connector_func(payload=payload, callback=self.process) + ) - responses = await asyncio.gather(*service_requests, return_exceptions=True) - - tasks = [] - for service, response in zip(next_services, responses): - if response is None: - r = [None] - rt = time() - else: - r = response[0] - rt = response[1] - if r is not None: - if isinstance(r, Exception): - raise r - tasks.append(self.process(dialog_id, service.name, r, rt)) - await asyncio.gather(*tasks) - - if has_responder: # TODO(Pugin): this part breaks some processing logic on the end - for i in has_responder: - i.state_processor_method(workflow_record['dialog'], workflow_record['dialog_object'], None) - if not workflow_record.get('hold_flush', False): - self.flush_record(dialog_id) + await asyncio.gather(*service_requests) diff --git a/core/config_parser.py b/core/config_parser.py index 938d4835..0b9557da 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -96,9 +96,14 @@ def add_bot_to_name(name): previous_services = {i.name for i in services if 'SKILLS' in i.tags} if not RESPONSE_SELECTORS: - services.append(Service('confidence_response_selector', ConfidenceResponseSelectorConnector().send, - StateManager.add_bot_utterance_simple_dict, - 1, ['RESPONSE_SELECTORS'], previous_services, simple_workflow_formatter)) + services.append( + Service( + 'confidence_response_selector', + ConfidenceResponseSelectorConnector('confidence_response_selector').send, + StateManager.add_bot_utterance_simple_dict, + 1, ['RESPONSE_SELECTORS'], previous_services, simple_workflow_formatter + ) + ) else: for r in RESPONSE_SELECTORS: service, workers = make_service_from_config_rec(r, session, StateManager.add_bot_utterance_simple_dict, diff --git a/core/connectors.py b/core/connectors.py index c3677429..3eba52ba 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -11,18 +11,21 @@ def __init__(self, session: aiohttp.ClientSession, url: str, formatter: Callable self.formatter = formatter self.service_name = service_name - async def send(self, payload: Dict): + async def send(self, payload: Dict, callback: Callable): async with self.session.post(self.url, json=self.formatter([payload])) as resp: response = await resp.json() - response_time = time.time() - return {self.service_name: self.formatter(response[0], mode='out')}, response_time + await callback( + dialog_id=payload['id'], service_name=self.service_name, + response={self.service_name: self.formatter(response[0], mode='out')}, + respinse_time=time.time() + ) class AioQueueConnector: def __init__(self, queue): self.queue = queue - async def send(self, payload: Dict): + async def send(self, payload: Dict, **kwargs): await self.queue.put(payload) @@ -56,29 +59,47 @@ async def call_service(self, process_callable): class ConfidenceResponseSelectorConnector: - async def send(self, payload: Dict): + def __init__(self, service_name: str): + self.service_name = service_name + + async def send(self, payload: Dict, callback: Callable): response = payload['utterances'][-1]['selected_skills'] best_skill = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0] - return {'confidence_response_selector': {'skill_name': best_skill[0], - 'text': best_skill[1]['text'], - 'confidence': best_skill[1]['confidence']}}, time.time() + await callback( + dialog_id=payload['id'], service_name=self.service_name, + response={ + 'confidence_response_selector': { + 'skill_name': best_skill[0], + 'text': best_skill[1]['text'], + 'confidence': best_skill[1]['confidence'] + } + }, + response_time=time.time()) class HttpOutputConnector: - def __init__(self, intermediate_storage: Dict): + def __init__(self, intermediate_storage: Dict, service_name: str): self.intermediate_storage = intermediate_storage + self.service_name = service_name - async def send(self, payload): + async def send(self, payload: Dict, callback: Callable): message_uuid = payload['message_uuid'] event = payload['event'] response_text = payload['dialog']['utterances'][-1]['text'] self.intermediate_storage[message_uuid] = response_text event.set() + await callback(payload['dialog']['id'], self.service_name, + None, time.time()) class EventSetOutputConnector: - async def send(self, payload): + def __init__(self, service_name: str): + self.service_name = service_name + + async def send(self, payload: Dict, callback: Callable): event = payload.get('event', None) if not event or not isinstance(event, asyncio.Event): raise ValueError("'event' key is not presented in payload") event.set() + await callback(payload['dialog']['id'], self.service_name, + None, time.time()) diff --git a/core/run.py b/core/run.py index 815f8f10..27e5124c 100644 --- a/core/run.py +++ b/core/run.py @@ -149,7 +149,7 @@ def main(): services, workers, session = parse_old_config() if CHANNEL == 'cmd_client': - endpoint = Service('cmd_responder', EventSetOutputConnector().send, + endpoint = Service('cmd_responder', EventSetOutputConnector('cmd_responder').send, StateManager.save_dialog_dict, 1, ['responder']) input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) loop = asyncio.get_event_loop() @@ -172,7 +172,7 @@ def main(): logging.shutdown() elif CHANNEL == 'http_client': intermediate_storage = {} - endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage).send, + endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage, 'http_responder').send, StateManager.save_dialog_dict, 1, ['responder']) input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) register_msg, process_callable = prepare_agent(services, endpoint, input_srv, args.response_logger) diff --git a/tests/dummy_connectors_test_setup.py b/tests/dummy_connectors_test_setup.py index 94c92220..53516e4f 100644 --- a/tests/dummy_connectors_test_setup.py +++ b/tests/dummy_connectors_test_setup.py @@ -1,6 +1,7 @@ import asyncio import argparse import uuid +import time from aiohttp import web from datetime import datetime @@ -24,9 +25,13 @@ def __init__(self, returns, sleeptime, service_name): self.sleeptime = sleeptime self.service_name = service_name - async def send(self, payload): + async def send(self, payload, callback): await asyncio.sleep(self.sleeptime) - return {self.service_name: {"text": choice(self.returns), "confidence": 0.5}} + await callback( + dialog_id=payload['id'], + service_name=self.service_name, + response={self.service_name: {"text": choice(self.returns), "confidence": 0.5}}, + response_time=time.time()) class DummySelectorConnector: @@ -35,9 +40,13 @@ def __init__(self, returns, sleeptime, service_name): self.sleeptime = sleeptime self.service_name = service_name - async def send(self, payload): + async def send(self, payload, callback): await asyncio.sleep(self.sleeptime) - return {self.service_name: self.returns} + await callback( + dialog_id=payload['id'], + service_name=self.service_name, + response={self.service_name: self.returns}, + response_time=time.time()) async def on_shutdown(app): @@ -107,7 +116,7 @@ def main(): s.connector_func = DummyConnector(['we have a phrase', 'and another one', 'not so short one'], 0.01, s.name).send intermediate_storage = {} - endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage).send, + endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage, 'http_responder').send, StateManager.save_dialog_dict, 1, ['responder']) input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) register_msg, process_callable = prepare_agent(services, endpoint, input_srv, False) From 39d6894dabaf545dcbfeba6eb09f72594a226549 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Mon, 30 Sep 2019 17:38:32 +0300 Subject: [PATCH 038/133] fix wrong service names in config parser --- core/agent.py | 8 +++++--- core/config_parser.py | 2 +- core/connectors.py | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/core/agent.py b/core/agent.py index 2d7134a0..9b36041f 100644 --- a/core/agent.py +++ b/core/agent.py @@ -78,7 +78,8 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res payload=response) # Flush record and return zero next services if service is is_responder if service.is_responder(): - self.flush_record(dialog_id) + if not workflow_record.get('hold_flush'): + self.flush_record(dialog_id) return [] # Calculating next steps @@ -112,11 +113,12 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, if require_response: event = asyncio.Event() kwargs['event'] = event - self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) + self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, hold_flush=True, **kwargs) self.register_service_request(str(dialog.id), 'input') await self.process(str(dialog.id), 'input', utterance, time()) await event.wait() - return self.get_workflow_record(str(dialog.id)) + return self.flush_record(str(dialog.id)) + else: self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) await self.process(str(dialog.id), 'input', utterance, time()) diff --git a/core/config_parser.py b/core/config_parser.py index 0b9557da..dde2c09c 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -29,7 +29,7 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags if conf_record['protocol'] == 'http': if batch_size == 1 and isinstance(url, str): - connector_func = HTTPConnector(sess, url, formatter, conf_record['name']).send + connector_func = HTTPConnector(sess, url, formatter, name).send else: queue = asyncio.Queue() connector_func = AioQueueConnector(queue).send # worker task and queue connector diff --git a/core/connectors.py b/core/connectors.py index 3eba52ba..fdc40438 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -17,7 +17,7 @@ async def send(self, payload: Dict, callback: Callable): await callback( dialog_id=payload['id'], service_name=self.service_name, response={self.service_name: self.formatter(response[0], mode='out')}, - respinse_time=time.time() + response_time=time.time() ) @@ -89,7 +89,7 @@ async def send(self, payload: Dict, callback: Callable): self.intermediate_storage[message_uuid] = response_text event.set() await callback(payload['dialog']['id'], self.service_name, - None, time.time()) + response_text, time.time()) class EventSetOutputConnector: @@ -102,4 +102,4 @@ async def send(self, payload: Dict, callback: Callable): raise ValueError("'event' key is not presented in payload") event.set() await callback(payload['dialog']['id'], self.service_name, - None, time.time()) + " ", time.time()) From 4471bfe555d8c3beb6a33657a715db2fbdd2cfa4 Mon Sep 17 00:00:00 2001 From: litinsky Date: Mon, 30 Sep 2019 18:16:14 +0300 Subject: [PATCH 039/133] Added transport files --- core/transport/__init__.py | 10 + core/transport/base.py | 80 +++++++ core/transport/gateways/__init__.py | 0 core/transport/gateways/rabbitmq.py | 344 ++++++++++++++++++++++++++++ core/transport/messages.py | 96 ++++++++ requirements.txt | 1 + 6 files changed, 531 insertions(+) create mode 100644 core/transport/__init__.py create mode 100644 core/transport/base.py create mode 100644 core/transport/gateways/__init__.py create mode 100644 core/transport/gateways/rabbitmq.py create mode 100644 core/transport/messages.py diff --git a/core/transport/__init__.py b/core/transport/__init__.py new file mode 100644 index 00000000..5058b6e1 --- /dev/null +++ b/core/transport/__init__.py @@ -0,0 +1,10 @@ +from core.transport.gateways.rabbitmq import RabbitMQAgentGateway, RabbitMQServiceGateway, RabbitMQChannelGateway + + +transport_map = { + 'rabbitmq': { + 'agent': RabbitMQAgentGateway, + 'service': RabbitMQServiceGateway, + 'channel': RabbitMQChannelGateway + } +} \ No newline at end of file diff --git a/core/transport/base.py b/core/transport/base.py new file mode 100644 index 00000000..c8292836 --- /dev/null +++ b/core/transport/base.py @@ -0,0 +1,80 @@ +from typing import List, Callable, TypeVar, Union, Dict, Any, Awaitable, Optional + + +TAgentGateway = TypeVar('TAgentGateway', bound='AgentGatewayBase') +TServiceCaller = TypeVar('TServiceCaller', bound='ServiceCallerBase') +TServiceGateway = TypeVar('TServiceGateway', bound='ServiceGatewayBase') +TChannelConnector = TypeVar('TChannelConnector', bound='ChannelConnectorBase') +TChannelGateway = TypeVar('TChannelGateway', bound='ChannelGatewayBase') + + +class AgentGatewayBase: + _on_service_callback: Callable[[Dict], Awaitable] + _on_channel_callback: Callable[[str, str, str, bool], Awaitable] + + def __init__(self, on_service_callback: Callable[[Dict], Awaitable], + on_channel_callback: Callable[[str, str, str, bool], Awaitable], + *args, **kwargs): + + super(AgentGatewayBase, self).__init__(*args, **kwargs) + self._on_service_callback = on_service_callback + self._on_channel_callback = on_channel_callback + + async def send_to_service(self, service: str, dialog_state: dict) -> None: + raise NotImplementedError + + async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> None: + raise NotImplementedError + + +class ServiceCallerBase: + _config: dict + _service_name: str + _formatter: Callable[[Union[List[Dict], Any], bool], Union[Any, List[Any]]] + + def __init__(self, + config: dict, + formatter: Callable[[Union[List[Dict], Any], bool], Union[Any, List[Any]]]) -> None: + + self._config = config + self._service_name = config['service']['name'] + self._formatter = formatter + + def infer(self, dialog_states_batch: List[dict]) -> Optional[List[dict]]: + raise NotImplementedError + + +class ServiceGatewayBase: + _service_caller: TServiceCaller + + def __init__(self, service_caller: ServiceCallerBase, *args, **kwargs) -> None: + super(ServiceGatewayBase, self).__init__(*args, **kwargs) + self._service_caller = service_caller + + def _infer(self, dialog_states_batch: List[dict]) -> List[dict]: + return self._service_caller.infer(dialog_states_batch) + + +class ChannelConnectorBase: + _config: dict + _channel_id: str + _on_channel_callback: Callable[[str, str, str, bool], Awaitable] + + def __init__(self, config: dict, on_channel_callback: Callable[[str, str, str, bool], Awaitable]) -> None: + self._config = config + self._channel_id = self._config['channel']['id'] + self._on_channel_callback = on_channel_callback + + async def send_to_channel(self, user_id: str, response: str) -> None: + raise NotImplementedError + + +class ChannelGatewayBase: + _to_channel_callback: Callable[[str, str], Awaitable] + + def __init__(self, to_channel_callback: Callable[[str, str], Awaitable], *args, **kwargs) -> None: + super(ChannelGatewayBase, self).__init__(*args, **kwargs) + self._to_channel_callback = to_channel_callback + + async def send_to_agent(self, utterance: str, channel_id: str, user_id: str, reset_dialog: bool) -> None: + raise NotImplementedError diff --git a/core/transport/gateways/__init__.py b/core/transport/gateways/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py new file mode 100644 index 00000000..d1b70842 --- /dev/null +++ b/core/transport/gateways/rabbitmq.py @@ -0,0 +1,344 @@ +import asyncio +import json +import functools +import time +from uuid import uuid4 +from typing import Dict, List, Optional, Callable, Awaitable +from logging import getLogger + +import aio_pika +from aio_pika import Connection, Channel, Exchange, Queue, IncomingMessage, Message + +from core.transport.base import AgentGatewayBase, ServiceGatewayBase, ChannelGatewayBase +from core.transport.base import TServiceCaller +from core.transport.messages import ServiceTaskMessage, ServiceResponseMessage, ToChannelMessage, FromChannelMessage +from core.transport.messages import TMessageBase, get_transport_message + + +AGENT_IN_EXCHANGE_NAME_TEMPLATE = '{agent_namespace}_e_in' +AGENT_OUT_EXCHANGE_NAME_TEMPLATE = '{agent_namespace}_e_out' +AGENT_QUEUE_NAME_TEMPLATE = '{agent_namespace}_q_agent_{agent_name}' +AGENT_ROUTING_KEY_TEMPLATE = 'agent.{agent_name}' + +SERVICE_QUEUE_NAME_TEMPLATE = '{agent_namespace}_q_service_{service_name}' +SERVICE_ROUTING_KEY_TEMPLATE = 'service.{service_name}.any' +SERVICE_INSTANCE_ROUTING_KEY_TEMPLATE = 'service.{service_name}.instance.{instance_id}' + +CHANNEL_QUEUE_NAME_TEMPLATE = '{agent_namespace}_{agent_name}_q_channel_{channel_id}' +CHANNEL_ROUTING_KEY_TEMPLATE = 'agent.{agent_name}.channel.{channel_id}.any' + +logger = getLogger(__name__) + + +# TODO: add proper RabbitMQ SSL authentication +# TODO: add load balancing for stateful skills +class RabbitMQTransportBase: + _config: dict + _loop: asyncio.AbstractEventLoop + _agent_in_exchange: Exchange + _agent_out_exchange: Exchange + _connection: Connection + _agent_in_channel: Channel + _agent_out_channel: Channel + _in_queue: Optional[Queue] + _response_timeout_sec: int + + def __init__(self, config: dict, *args, **kwargs): + super(RabbitMQTransportBase, self).__init__(*args, **kwargs) + self._config = config + self._in_queue = None + self._response_timeout_sec = config['agent']['response_timeout_sec'] + + async def _connect(self) -> None: + agent_namespace = self._config['agent_namespace'] + + host = self._config['transport']['rabbitmq']['host'] + port = self._config['transport']['rabbitmq']['port'] + login = self._config['transport']['rabbitmq']['login'] + password = self._config['transport']['rabbitmq']['password'] + virtualhost = self._config['transport']['rabbitmq']['virtualhost'] + + logger.info('Starting RabbitMQ connection...') + + while True: + try: + self._connection = await aio_pika.connect_robust(loop=self._loop, host=host, port=port, login=login, + password=password, virtualhost=virtualhost) + + logger.info('RabbitMQ connected') + break + except ConnectionError: + reconnect_timeout = 5 + logger.error(f'RabbitMQ connection error, making another attempt in {reconnect_timeout} secs') + time.sleep(reconnect_timeout) + + self._agent_in_channel = await self._connection.channel() + agent_in_exchange_name = AGENT_IN_EXCHANGE_NAME_TEMPLATE.format(agent_namespace=agent_namespace) + self._agent_in_exchange = await self._agent_in_channel.declare_exchange(name=agent_in_exchange_name, + type=aio_pika.ExchangeType.TOPIC) + logger.info(f'Declared agent in exchange: {agent_in_exchange_name}') + + self._agent_out_channel = await self._connection.channel() + agent_out_exchange_name = AGENT_OUT_EXCHANGE_NAME_TEMPLATE.format(agent_namespace=agent_namespace) + self._agent_out_exchange = await self._agent_in_channel.declare_exchange(name=agent_out_exchange_name, + type=aio_pika.ExchangeType.TOPIC) + logger.info(f'Declared agent out exchange: {agent_out_exchange_name}') + + async def _setup_queues(self) -> None: + raise NotImplementedError + + async def _on_message_callback(self, message: IncomingMessage) -> None: + raise NotImplementedError + + +class RabbitMQAgentGateway(RabbitMQTransportBase, AgentGatewayBase): + _agent_name: str + _service_responded_events: Dict[str, asyncio.Event] + _service_responses: Dict[str, dict] + + def __init__(self, config: dict, + on_service_callback: Callable[[Dict], Awaitable], + on_channel_callback: Callable[[str, str, str, bool], Awaitable]) -> None: + + super(RabbitMQAgentGateway, self).__init__(config=config, + on_service_callback=on_service_callback, + on_channel_callback=on_channel_callback) + + self._loop = asyncio.get_event_loop() + self._agent_name = self._config['agent']['name'] + + self._loop.run_until_complete(self._connect()) + self._loop.run_until_complete(self._setup_queues()) + self._loop.run_until_complete(self._in_queue.consume(callback=self._on_message_callback)) + logger.info('Agent in queue started consuming') + + async def _setup_queues(self) -> None: + agent_namespace = self._config['agent_namespace'] + in_queue_name = AGENT_QUEUE_NAME_TEMPLATE.format(agent_namespace=agent_namespace, agent_name=self._agent_name) + self._in_queue = await self._agent_in_channel.declare_queue(name=in_queue_name, durable=True) + logger.info(f'Declared agent in queue: {in_queue_name}') + + routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name) + await self._in_queue.bind(exchange=self._agent_in_exchange, routing_key=routing_key) + logger.info(f'Queue: {in_queue_name} bound to routing key: {routing_key}') + + async def _on_message_callback(self, message: IncomingMessage) -> None: + message_in: TMessageBase = get_transport_message(json.loads(message.body, encoding='utf-8')) + await message.ack() + + if isinstance(message_in, ServiceResponseMessage): + logger.debug(f'Received service response message with task uuid {message_in.task_uuid}') + partial_dialog_state = message_in.partial_dialog_state + await self._loop.create_task(self._on_service_callback(partial_dialog_state=partial_dialog_state)) + + elif isinstance(message_in, FromChannelMessage): + utterance = message_in.utterance + channel_id = message_in.channel_id + user_id = message_in.user_id + reset_dialog = message_in.reset_dialog + logger.debug(f'Received message from channel {channel_id}, user {user_id}') + await self._loop.create_task(self._on_channel_callback(utterance=utterance, + channel_id=channel_id, + user_id=user_id, + reset_dialog=reset_dialog)) + + async def send_to_service(self, service_name: str, dialog_state: dict) -> None: + task_uuid = str(uuid4()) + task = ServiceTaskMessage(agent_name=self._agent_name, task_uuid=task_uuid, dialog_state=dialog_state) + logger.debug(f'Created task {task_uuid} to service {service_name} with dialog state: {str(dialog_state)}') + + message = Message(body=json.dumps(task.to_json()).encode('utf-8'), + delivery_mode=aio_pika.DeliveryMode.PERSISTENT, + expiration=self._response_timeout_sec) + + routing_key = SERVICE_ROUTING_KEY_TEMPLATE.format(service_name=service_name) + await self._agent_out_exchange.publish(message=message, routing_key=routing_key) + logger.debug(f'Published task {task_uuid} with routing key {routing_key}') + + async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> None: + channel_message = ToChannelMessage(agent_name=self._agent_name, + channel_id=channel_id, + user_id=user_id, + response=response) + + channel_message_json = channel_message.to_json() + message = Message(body=json.dumps(channel_message_json).encode('utf-8'), + delivery_mode=aio_pika.DeliveryMode.PERSISTENT, + expiration=self._response_timeout_sec) + + routing_key = CHANNEL_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name, channel_id=channel_id) + await self._agent_out_exchange.publish(message=message, routing_key=routing_key) + logger.debug(f'Published channel message: {str(channel_message_json)}') + + +class RabbitMQServiceGateway(RabbitMQTransportBase, ServiceGatewayBase): + _service_caller: TServiceCaller + _service_name: str + _instance_id: str + _batch_size: int + _incoming_messages_buffer: List[IncomingMessage] + _add_to_buffer_lock: asyncio.Lock + _infer_lock: asyncio.Lock + + def __init__(self, config: dict, service_caller: TServiceCaller) -> None: + super(RabbitMQServiceGateway, self).__init__(config=config, service_caller=service_caller) + self._loop = asyncio.get_event_loop() + self._service_name = self._config['service']['name'] + self._instance_id = self._config['service']['instance_id'] or f'{self._service_name}{str(uuid4())}' + self._batch_size = self._config['service']['batch_size'] + + self._incoming_messages_buffer = [] + self._add_to_buffer_lock = asyncio.Lock() + self._infer_lock = asyncio.Lock() + + self._loop.run_until_complete(self._connect()) + self._loop.run_until_complete(self._setup_queues()) + self._loop.run_until_complete(self._in_queue.consume(callback=self._on_message_callback)) + logger.info(f'Service in queue started consuming') + + async def _setup_queues(self) -> None: + agent_namespace = self._config['agent_namespace'] + + in_queue_name = SERVICE_QUEUE_NAME_TEMPLATE.format(agent_namespace=agent_namespace, + service_name=self._service_name) + + self._in_queue = await self._agent_out_channel.declare_queue(name=in_queue_name, durable=True) + logger.info(f'Declared service in queue: {in_queue_name}') + + any_instance_routing_key = SERVICE_ROUTING_KEY_TEMPLATE.format(service_name=self._service_name) + await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=any_instance_routing_key) + logger.info(f'Queue: {in_queue_name} bound to routing key: {any_instance_routing_key}') + + this_instance_routing_key = SERVICE_INSTANCE_ROUTING_KEY_TEMPLATE.format(service_name=self._service_name, + instance_id=self._instance_id) + + await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=this_instance_routing_key) + logger.info(f'Queue: {in_queue_name} bound to routing key: {this_instance_routing_key}') + + await self._agent_out_channel.set_qos(prefetch_count=self._batch_size * 2) + + async def _on_message_callback(self, message: IncomingMessage) -> None: + await self._add_to_buffer_lock.acquire() + self._incoming_messages_buffer.append(message) + logger.debug('Incoming message received') + + if len(self._incoming_messages_buffer) < self._batch_size: + self._add_to_buffer_lock.release() + + await self._infer_lock.acquire() + try: + messages_batch = self._incoming_messages_buffer + + if messages_batch: + self._incoming_messages_buffer = [] + + if self._add_to_buffer_lock.locked(): + self._add_to_buffer_lock.release() + + tasks_batch = [ServiceTaskMessage.from_json(json.loads(message.body, encoding='utf-8')) + for message in messages_batch] + + processed_ok = await self._process_tasks(tasks_batch) + + if processed_ok: + for message in messages_batch: + await message.ack() + else: + for message in messages_batch: + await message.reject() + + elif self._add_to_buffer_lock.locked(): + self._add_to_buffer_lock.release() + finally: + self._infer_lock.release() + + async def _process_tasks(self, tasks_batch: List[ServiceTaskMessage]) -> bool: + task_agent_names_batch, task_uuids_batch, dialog_states_batch = \ + zip(*[(task.agent_name, task.task_uuid, task.dialog_state) for task in tasks_batch]) + + logger.debug(f'Prepared for infering tasks {str(task_uuids_batch)}') + + # TODO: Think about proper infer errors and aknowledge handling + try: + inferer = functools.partial(self._infer, dialog_states_batch) + infer_timeout = self._config['service']['infer_timeout_sec'] + responses_batch = await asyncio.wait_for(self._loop.run_in_executor(executor=None, func=inferer), + infer_timeout) + logger.debug(f'Processed tasks {str(task_uuids_batch)}') + except asyncio.TimeoutError: + responses_batch = None + + if responses_batch: + await asyncio.wait([self._send_results(task_agent_names_batch[i], task_uuids_batch[i], partial_state) + for i, partial_state in enumerate(responses_batch)]) + return True + else: + return False + + async def _send_results(self, agent_name: str, task_uuid: str, partial_dialog_state: dict) -> None: + result = ServiceResponseMessage(agent_name=agent_name, + task_uuid=task_uuid, + service_name=self._service_name, + service_instance_id=self._instance_id, + partial_dialog_state=partial_dialog_state) + + message = Message(body=json.dumps(result.to_json()).encode('utf-8'), + delivery_mode=aio_pika.DeliveryMode.PERSISTENT, + expiration=self._response_timeout_sec) + + routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=agent_name) + await self._agent_in_exchange.publish(message=message, routing_key=routing_key) + logger.debug(f'Sent response for task {str(task_uuid)} with routing key {routing_key}') + + +class RabbitMQChannelGateway(RabbitMQTransportBase, ChannelGatewayBase): + _agent_name: str + _channel_id: str + + def __init__(self, config: dict, to_channel_callback: Callable[[str, str], Awaitable]) -> None: + super(RabbitMQChannelGateway, self).__init__(config=config, to_channel_callback=to_channel_callback) + self._loop = asyncio.get_event_loop() + self._agent_name = self._config['agent']['name'] + self._channel_id = self._config['channel']['id'] + + self._loop.run_until_complete(self._connect()) + self._loop.run_until_complete(self._setup_queues()) + self._loop.run_until_complete(self._in_queue.consume(callback=self._on_message_callback)) + logger.info(f'Channel connector messages queue from agent started consuming') + + async def _setup_queues(self) -> None: + agent_namespace = self._config['agent_namespace'] + + in_queue_name = CHANNEL_QUEUE_NAME_TEMPLATE.format(agent_namespace=agent_namespace, + agent_name=self._agent_name, + channel_id=self._channel_id) + + self._in_queue = await self._agent_out_channel.declare_queue(name=in_queue_name, durable=True) + logger.info(f'Declared channel in queue: {in_queue_name}') + + routing_key = CHANNEL_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name, channel_id=self._channel_id) + await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=routing_key) + logger.info(f'Queue: {in_queue_name} bound to routing key: {routing_key}') + + async def _on_message_callback(self, message: IncomingMessage) -> None: + message_json = json.loads(message.body, encoding='utf-8') + message_to_channel: ToChannelMessage = ToChannelMessage.from_json(message_json) + await self._loop.create_task(self._to_channel_callback(message_to_channel.user_id, message_to_channel.response)) + await message.ack() + logger.debug(f'Processed message to channel: {str(message_json)}') + + async def send_to_agent(self, utterance: str, channel_id: str, user_id: str, reset_dialog: bool) -> None: + message_from_channel = FromChannelMessage(agent_name=self._agent_name, + channel_id=channel_id, + user_id=user_id, + utterance=utterance, + reset_dialog=reset_dialog) + + message_json = message_from_channel.to_json() + message = Message(body=json.dumps(message_json).encode('utf-8'), + delivery_mode=aio_pika.DeliveryMode.PERSISTENT, + expiration=self._response_timeout_sec) + + routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name) + await self._agent_in_exchange.publish(message=message, routing_key=routing_key) + logger.debug(f'Processed message to agent: {str(message_json)}') diff --git a/core/transport/messages.py b/core/transport/messages.py new file mode 100644 index 00000000..20123443 --- /dev/null +++ b/core/transport/messages.py @@ -0,0 +1,96 @@ +from typing import TypeVar + + +class MessageBase: + @classmethod + def from_json(cls, message_json): + message_json.pop('msg_type') + return cls(**message_json) + + def to_json(self) -> dict: + return self.__dict__ + + +TMessageBase = TypeVar('TMessageBase', bound=MessageBase) + + +class ServiceTaskMessage(MessageBase): + msg_type = 'service_task' + agent_name: str + task_uuid: str + dialog_state: dict + + def __init__(self, agent_name: str, task_uuid: str, dialog_state: dict) -> None: + self.msg_type = self.__class__.msg_type + self.agent_name = agent_name + self.task_uuid = task_uuid + self.dialog_state = dialog_state + + +class ServiceResponseMessage(MessageBase): + msg_type = 'service_response' + agent_name: str + task_uuid: str + service_name: str + service_instance_id: str + partial_dialog_state: dict + + def __init__(self, agent_name: str, task_uuid: str, service_name: str, service_instance_id: str, + partial_dialog_state: dict) -> None: + self.msg_type = self.__class__.msg_type + self.agent_name = agent_name + self.task_uuid = task_uuid + self.service_name = service_name + self.service_instance_id = service_instance_id + self.partial_dialog_state = partial_dialog_state + + +class ToChannelMessage(MessageBase): + msg_type = 'to_channel_message' + agent_name: str + channel_id: str + user_id: str + response: str + + def __init__(self, agent_name: str, channel_id: str, user_id: str, response: str) -> None: + self.msg_type = self.__class__.msg_type + self.agent_name = agent_name + self.channel_id = channel_id + self.user_id = user_id + self.response = response + + +class FromChannelMessage(MessageBase): + msg_type = 'from_channel_message' + agent_name: str + channel_id: str + user_id: str + utterance: str + reset_dialog: bool + + def __init__(self, agent_name: str, channel_id: str, user_id: str, utterance: str, reset_dialog: bool) -> None: + self.msg_type = self.__class__.msg_type + self.agent_name = agent_name + self.channel_id = channel_id + self.user_id = user_id + self.utterance = utterance + self.reset_dialog = reset_dialog + + +_message_wrappers_map = { + 'service_task': ServiceTaskMessage, + 'service_response': ServiceResponseMessage, + 'to_channel_message': ToChannelMessage, + 'from_channel_message': FromChannelMessage +} + + +def get_transport_message(message_json: dict) -> TMessageBase: + message_type = message_json['msg_type'] + + if message_type not in _message_wrappers_map: + raise ValueError(f'Unknown transport message type: {message_type}') + + message_wrapper_class: TMessageBase = _message_wrappers_map[message_type] + + return message_wrapper_class.from_json(message_json) diff --git a/requirements.txt b/requirements.txt index 6d470c25..d9254237 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ mongoengine==0.17.0 aiohttp==3.5.4 aiohttp-swagger==1.0.9 pyyaml==5.1 +aio-pika==5.6.0 \ No newline at end of file From 13a2695b3f429ec87042ae6555b2516c1eaff96f Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 30 Sep 2019 18:27:32 +0300 Subject: [PATCH 040/133] style: improve code style and docstring --- core/pipeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/pipeline.py b/core/pipeline.py index b3d662ac..0ff7e2bd 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -61,10 +61,10 @@ def process_service_names(self): self.services[name_prev_service].next_services.add(service) return wrong_names # wrong names means that some service_names, used in previous services don't exist - def get_next_services(self, done=None, waiting=None): - if not done: + def get_next_services(self, done: set = None, waiting: set =None): + if done is not None: done = set() - if not waiting: + if waiting is not None: waiting = set() removed_names = waiting | done for name, service in self.services.items(): From 7a3f65532816608103870612874d0ee9e1bdf3b6 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 30 Sep 2019 18:46:27 +0300 Subject: [PATCH 041/133] fix: condition --- core/pipeline.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/pipeline.py b/core/pipeline.py index 0ff7e2bd..39385540 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -61,10 +61,10 @@ def process_service_names(self): self.services[name_prev_service].next_services.add(service) return wrong_names # wrong names means that some service_names, used in previous services don't exist - def get_next_services(self, done: set = None, waiting: set =None): - if done is not None: + def get_next_services(self, done: set = None, waiting: set = None): + if done is None: done = set() - if waiting is not None: + if waiting is None: waiting = set() removed_names = waiting | done for name, service in self.services.items(): @@ -76,7 +76,7 @@ def get_next_services(self, done: set = None, waiting: set =None): def get_endpoint_services(self): return [s for s in self.services.values() if not s.next_services and 'responder' not in s.tags] - def add_responder_service(self, service): + def add_responder_service(self, segit rvice): if not service.is_responder(): raise ValueError('service should be a responder') endpoints = self.get_endpoint_services() From 7da52dad97567dcf9d645014f0be6eaea2e5f48b Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 27 Sep 2019 21:41:15 +0300 Subject: [PATCH 042/133] fix: schema copy --- core/state_manager.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/state_manager.py b/core/state_manager.py index db712858..7efc691f 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -1,5 +1,6 @@ from datetime import datetime from typing import Hashable, Any, Optional, Dict, TypeVar, List +from copy import deepcopy from mongoengine import connect @@ -182,7 +183,7 @@ def update_bot(bot: Bot, active_skill: Dict): @classmethod def add_human_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs) -> None: - utterance = HUMAN_UTTERANCE_SCHEMA + utterance = deepcopy(HUMAN_UTTERANCE_SCHEMA) utterance['text'] = payload utterance['date_time'] = str(datetime.now()) utterance['user'] = dialog['human'] @@ -221,7 +222,7 @@ def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payl cls.update_human_dict(dialog['human'], active_skill) cls.update_bot_dict(dialog['bot'], active_skill) - utterance = BOT_UTTERANCE_SCHEMA + utterance = deepcopy(BOT_UTTERANCE_SCHEMA) utterance['text'] = new_text utterance['orig_text'] = active_skill['text'] utterance['date_time'] = str(datetime.now()) From 8b03bb26485c4ef92a6baeee5722a71dabdc33ec Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 27 Sep 2019 14:34:21 +0300 Subject: [PATCH 043/133] feat: update mongo image, take db vars from environment --- config.py | 10 +++++----- docker-compose.yml | 2 +- generate_composefile.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/config.py b/config.py index 88d890c5..3f29d390 100644 --- a/config.py +++ b/config.py @@ -1,11 +1,11 @@ -from functools import partial +from os import getenv from state_formatters.dp_formatters import * -DB_NAME = 'test' -DB_HOST = '127.0.0.1' -DB_PORT = 27017 -DB_PATH = '/data/db' +DB_NAME = getenv('DB_NAME', 'test') +DB_HOST = getenv('DB_HOST', '127.0.0.1') +DB_PORT = getenv('DB_PORT', 27017) +DB_PATH = getenv('DB_PATH', '/data/db') MAX_WORKERS = 4 diff --git a/docker-compose.yml b/docker-compose.yml index ab261150..e4f33a67 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -57,7 +57,7 @@ services: - ${EXTERNAL_FOLDER}/.deeppavlov:/root/.deeppavlov mongo: command: mongod - image: mongo:3.2.0 + image: mongo:4.0.0 ports: - 27017:27017 volumes: diff --git a/generate_composefile.py b/generate_composefile.py index 0f503aa1..011014de 100644 --- a/generate_composefile.py +++ b/generate_composefile.py @@ -24,7 +24,7 @@ MONGO_BASIC = { 'mongo': {'command': 'mongod', - 'image': 'mongo:3.2.0', + 'image': 'mongo:4.0.0', 'ports': ['{}:27017'], # map port to none standard port, to avoid conflicts with locally installed mongodb. 'volumes': ['/var/run/docker.sock:/var/run/docker.sock', f'{DB_PATH}:/root/data/db']} From 4807dd9ebe5eeeaedc4cae499db204b30c5ae0ca Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 30 Sep 2019 18:49:30 +0300 Subject: [PATCH 044/133] fix: condition --- core/pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/pipeline.py b/core/pipeline.py index 0ff7e2bd..44015850 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -62,9 +62,9 @@ def process_service_names(self): return wrong_names # wrong names means that some service_names, used in previous services don't exist def get_next_services(self, done: set = None, waiting: set =None): - if done is not None: + if done is None: done = set() - if waiting is not None: + if waiting is None: waiting = set() removed_names = waiting | done for name, service in self.services.items(): From ccb3dcd702b3aaa8ccc142446fa4a88d9a0d6c99 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 30 Sep 2019 19:21:14 +0300 Subject: [PATCH 045/133] fix: exclude formatters time from logging --- core/connectors.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/connectors.py b/core/connectors.py index fdc40438..ccd5271b 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -14,10 +14,11 @@ def __init__(self, session: aiohttp.ClientSession, url: str, formatter: Callable async def send(self, payload: Dict, callback: Callable): async with self.session.post(self.url, json=self.formatter([payload])) as resp: response = await resp.json() + response_time = time.time() await callback( dialog_id=payload['id'], service_name=self.service_name, response={self.service_name: self.formatter(response[0], mode='out')}, - response_time=time.time() + response_time=response_time ) @@ -65,6 +66,7 @@ def __init__(self, service_name: str): async def send(self, payload: Dict, callback: Callable): response = payload['utterances'][-1]['selected_skills'] best_skill = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0] + response_time = time.time() await callback( dialog_id=payload['id'], service_name=self.service_name, response={ @@ -74,7 +76,7 @@ async def send(self, payload: Dict, callback: Callable): 'confidence': best_skill[1]['confidence'] } }, - response_time=time.time()) + response_time=response_time) class HttpOutputConnector: @@ -88,8 +90,9 @@ async def send(self, payload: Dict, callback: Callable): response_text = payload['dialog']['utterances'][-1]['text'] self.intermediate_storage[message_uuid] = response_text event.set() + response_time = time.time() await callback(payload['dialog']['id'], self.service_name, - response_text, time.time()) + response_text, response_time) class EventSetOutputConnector: @@ -101,5 +104,6 @@ async def send(self, payload: Dict, callback: Callable): if not event or not isinstance(event, asyncio.Event): raise ValueError("'event' key is not presented in payload") event.set() + response_time = time.time() await callback(payload['dialog']['id'], self.service_name, - " ", time.time()) + " ", response_time) From be0561526c9832cec9343929ba80b63b943b2e14 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 30 Sep 2019 19:33:20 +0300 Subject: [PATCH 046/133] fix: location type annotation --- core/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/agent.py b/core/agent.py index 9b36041f..d4f8cb5a 100644 --- a/core/agent.py +++ b/core/agent.py @@ -104,7 +104,7 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res return next_services async def register_msg(self, utterance: str, user_telegram_id: Hashable, - user_device_type: Any, location=Any, + user_device_type: Any, location: Any, channel_type=str, deadline_timestamp=None, require_response=False, **kwargs): user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) From fcdc51c7a4f6ef037ffe804e939b9da43bc9fc06 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Mon, 30 Sep 2019 22:28:13 +0300 Subject: [PATCH 047/133] logging variant, services and agent timings --- core/agent.py | 21 ++++++++++------ core/connectors.py | 37 +++++++++++++++++----------- tests/dummy_connectors_test_setup.py | 11 ++++++--- 3 files changed, 44 insertions(+), 25 deletions(-) diff --git a/core/agent.py b/core/agent.py index d4f8cb5a..108cf15e 100644 --- a/core/agent.py +++ b/core/agent.py @@ -63,19 +63,24 @@ def get_services_status(self, dialog_id: str): return done, waiting def process_service_response(self, dialog_id: str, service_name: str = None, response: Any = None, - response_time: float = None): + **kwargs): workflow_record = self.get_workflow_record(dialog_id) # Updating workflow with service response service = self.pipeline.get_service_by_name(service_name) if service: - service_data = self.workflow[dialog_id]['services'][service_name] + service_data = workflow_record['services'][service_name] service_data['done'] = True - service_data['done_time'] = response_time + service_data['done_time'] = time() if response and service.state_processor_method: service.state_processor_method(dialog=workflow_record['dialog'], dialog_object=workflow_record['dialog_object'], payload=response) + + # passing kwargs to services record + if not set(service_data.keys()).intersection(set(kwargs.keys())): + service_data.update(kwargs) + # Flush record and return zero next services if service is is_responder if service.is_responder(): if not workflow_record.get('hold_flush'): @@ -115,17 +120,17 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, kwargs['event'] = event self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, hold_flush=True, **kwargs) self.register_service_request(str(dialog.id), 'input') - await self.process(str(dialog.id), 'input', utterance, time()) + await self.process(dialog_id=str(dialog.id), service_name='input', response=utterance) await event.wait() return self.flush_record(str(dialog.id)) - else: self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) - await self.process(str(dialog.id), 'input', utterance, time()) + self.register_service_request(str(dialog.id), 'input') + await self.process(dialog_id=str(dialog.id), service_name='input', response=utterance) - async def process(self, dialog_id, service_name=None, response: Any = None, response_time: float = None): + async def process(self, dialog_id, service_name=None, response: Any = None, **kwargs): workflow_record = self.get_workflow_record(dialog_id) - next_services = self.process_service_response(dialog_id, service_name, response, response_time) + next_services = self.process_service_response(dialog_id, service_name, response, **kwargs) service_requests = [] for service in next_services: diff --git a/core/connectors.py b/core/connectors.py index ccd5271b..e1f42343 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -12,13 +12,16 @@ def __init__(self, session: aiohttp.ClientSession, url: str, formatter: Callable self.service_name = service_name async def send(self, payload: Dict, callback: Callable): - async with self.session.post(self.url, json=self.formatter([payload])) as resp: + formatted_payload = self.formatter([payload]) + service_send_time = time.time() + async with self.session.post(self.url, json=formatted_payload) as resp: response = await resp.json() - response_time = time.time() + service_response_time = time.time() await callback( dialog_id=payload['id'], service_name=self.service_name, response={self.service_name: self.formatter(response[0], mode='out')}, - response_time=response_time + service_send_time=service_send_time, + service_response_time=service_response_time ) @@ -48,13 +51,18 @@ async def call_service(self, process_callable): batch.append(item) if batch: tasks = [] - async with self.session.post(self.url, json=self.formatter(batch)) as resp: + formatted_payload = self.formatter(batch) + service_send_time = time.time() + async with self.session.post(self.url, json=formatted_payload) as resp: response = await resp.json() - response_time = time.time() + service_response_time = time.time() for dialog, response_text in zip(batch, response): - tasks.append(process_callable(dialog['id'], self.service_name, - {self.service_name: self.formatter(response_text, mode='out')}, - response_time)) + tasks.append( + process_callable( + dialog_id=dialog['id'], service_name=self.service_name, + response={self.service_name: self.formatter(response_text, mode='out')}, + service_send_time=service_send_time, + service_response_time=service_response_time)) await asyncio.gather(*tasks) await asyncio.sleep(0.1) @@ -90,9 +98,10 @@ async def send(self, payload: Dict, callback: Callable): response_text = payload['dialog']['utterances'][-1]['text'] self.intermediate_storage[message_uuid] = response_text event.set() - response_time = time.time() - await callback(payload['dialog']['id'], self.service_name, - response_text, response_time) + await callback(dialog_id=payload['dialog']['id'], + service_name=self.service_name, + response=response_text, + service_response_time=time.time()) class EventSetOutputConnector: @@ -104,6 +113,6 @@ async def send(self, payload: Dict, callback: Callable): if not event or not isinstance(event, asyncio.Event): raise ValueError("'event' key is not presented in payload") event.set() - response_time = time.time() - await callback(payload['dialog']['id'], self.service_name, - " ", response_time) + await callback(dialog_id=payload['dialog']['id'], + service_name=self.service_name, + response=" ", service_response_time=time.time()) diff --git a/tests/dummy_connectors_test_setup.py b/tests/dummy_connectors_test_setup.py index 53516e4f..e156c80f 100644 --- a/tests/dummy_connectors_test_setup.py +++ b/tests/dummy_connectors_test_setup.py @@ -15,6 +15,7 @@ parser = argparse.ArgumentParser() parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) +parser.add_argument('-rl', '--response-logger', help='run agent with services response logging', action='store_true') args = parser.parse_args() CHANNEL = 'vk' @@ -26,12 +27,14 @@ def __init__(self, returns, sleeptime, service_name): self.service_name = service_name async def send(self, payload, callback): + service_send_time = time.time() await asyncio.sleep(self.sleeptime) await callback( dialog_id=payload['id'], service_name=self.service_name, response={self.service_name: {"text": choice(self.returns), "confidence": 0.5}}, - response_time=time.time()) + service_send_time=service_send_time, + service_response_time=time.time()) class DummySelectorConnector: @@ -41,12 +44,14 @@ def __init__(self, returns, sleeptime, service_name): self.service_name = service_name async def send(self, payload, callback): + service_send_time = time.time() await asyncio.sleep(self.sleeptime) await callback( dialog_id=payload['id'], service_name=self.service_name, response={self.service_name: self.returns}, - response_time=time.time()) + service_send_time=service_send_time, + service_response_time=time.time()) async def on_shutdown(app): @@ -119,7 +124,7 @@ def main(): endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage, 'http_responder').send, StateManager.save_dialog_dict, 1, ['responder']) input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) - register_msg, process_callable = prepare_agent(services, endpoint, input_srv, False) + register_msg, process_callable = prepare_agent(services, endpoint, input_srv, args.response_logger) app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), on_shutdown) From 8d9100917666d5174f535939364d9f2c25c605ca Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Tue, 1 Oct 2019 14:06:56 +0300 Subject: [PATCH 048/133] improved connectors --- core/connectors.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/core/connectors.py b/core/connectors.py index e1f42343..5189f24b 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -72,6 +72,7 @@ def __init__(self, service_name: str): self.service_name = service_name async def send(self, payload: Dict, callback: Callable): + service_send_time = time.time() response = payload['utterances'][-1]['selected_skills'] best_skill = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0] response_time = time.time() @@ -84,7 +85,8 @@ async def send(self, payload: Dict, callback: Callable): 'confidence': best_skill[1]['confidence'] } }, - response_time=response_time) + service_send_time=service_send_time, + service_response_time=response_time) class HttpOutputConnector: @@ -96,11 +98,13 @@ async def send(self, payload: Dict, callback: Callable): message_uuid = payload['message_uuid'] event = payload['event'] response_text = payload['dialog']['utterances'][-1]['text'] + service_send_time = time.time() self.intermediate_storage[message_uuid] = response_text event.set() await callback(dialog_id=payload['dialog']['id'], service_name=self.service_name, response=response_text, + service_send_time=service_send_time, service_response_time=time.time()) @@ -110,9 +114,12 @@ def __init__(self, service_name: str): async def send(self, payload: Dict, callback: Callable): event = payload.get('event', None) + service_send_time = time.time() if not event or not isinstance(event, asyncio.Event): raise ValueError("'event' key is not presented in payload") event.set() await callback(dialog_id=payload['dialog']['id'], service_name=self.service_name, - response=" ", service_response_time=time.time()) + response=" ", + service_send_time=service_send_time, + service_response_time=time.time()) From 65127d929351cdb14bbfb06894fde54b0270404d Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 1 Oct 2019 14:46:53 +0300 Subject: [PATCH 049/133] fix: fix dp version in the docker file, hotfixes --- core/agent.py | 3 ++- dp/dockerfile_skill_cpu | 2 +- dp/dockerfile_skill_gpu | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/agent.py b/core/agent.py index d4f8cb5a..809ebe6a 100644 --- a/core/agent.py +++ b/core/agent.py @@ -105,7 +105,7 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res async def register_msg(self, utterance: str, user_telegram_id: Hashable, user_device_type: Any, location: Any, - channel_type=str, deadline_timestamp=None, + channel_type: str, deadline_timestamp=None, require_response=False, **kwargs): user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) should_reset = True if utterance == TG_START_UTT else False @@ -121,6 +121,7 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, else: self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) + self.register_service_request(str(dialog.id), 'input') await self.process(str(dialog.id), 'input', utterance, time()) async def process(self, dialog_id, service_name=None, response: Any = None, response_time: float = None): diff --git a/dp/dockerfile_skill_cpu b/dp/dockerfile_skill_cpu index 392e6aa9..4188b57f 100644 --- a/dp/dockerfile_skill_cpu +++ b/dp/dockerfile_skill_cpu @@ -1,4 +1,4 @@ -FROM deeppavlov/base-cpu:latest +FROM deeppavlov/base-cpu:0.6.1 ARG skillconfig ARG skillport diff --git a/dp/dockerfile_skill_gpu b/dp/dockerfile_skill_gpu index c88ac1a3..8b95c2ea 100644 --- a/dp/dockerfile_skill_gpu +++ b/dp/dockerfile_skill_gpu @@ -1,4 +1,4 @@ -FROM deeppavlov/base-gpu:latest +FROM deeppavlov/base-gpu:0.6.1 ARG skillconfig ARG skillport From 94974e8faa05f6646c37966b5452a3e4cda4f163 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 2 Oct 2019 12:34:24 +0300 Subject: [PATCH 050/133] Integrated highload enteties to agent without enabling highload --- config.py | 19 ++++++++++ core/config_parser.py | 86 +++++++++++++++++++++++++++++-------------- core/run.py | 24 +++++++++--- 3 files changed, 96 insertions(+), 33 deletions(-) diff --git a/config.py b/config.py index 3f29d390..f2a6ed10 100644 --- a/config.py +++ b/config.py @@ -7,6 +7,25 @@ DB_PORT = getenv('DB_PORT', 27017) DB_PATH = getenv('DB_PATH', '/data/db') +HIGHLOAD_SETTINGS = { + 'agent_namespace': 'deeppavlov_agent', + 'agent': { + 'agent_name': 'dp_agent', + 'response_timeout': 120 + }, + 'channels': {}, + 'transport': { + 'type': 'rabbitmq', + 'rabbitmq': { + 'host': '127.0.0.1', + 'port': 5672, + 'login': 'guest', + 'password': 'guest', + 'virtualhost': '/' + } + } +} + MAX_WORKERS = 4 AGENT_ENV_FILE = "agent.env" diff --git a/core/config_parser.py b/core/config_parser.py index 938d4835..6195e416 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -1,21 +1,25 @@ +from functools import partial + import aiohttp import asyncio -from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS,\ - RESPONSE_SELECTORS, POSTPROCESSORS +from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, \ + RESPONSE_SELECTORS, POSTPROCESSORS, HIGHLOAD_SETTINGS from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, \ QueueListenerBatchifyer from core.pipeline import Service, simple_workflow_formatter from core.state_manager import StateManager +from core.transport import transport_map -def parse_old_config(): +def parse_old_config(on_channel_callback, on_service_callback): services = [] worker_tasks = [] - session = aiohttp.ClientSession() + session = None + gateway = None def make_service_from_config_rec(conf_record, sess, state_processor_method, tags, names_previous_services, - name_modifier=None): + gate, name_modifier=None): _worker_tasks = [] if name_modifier: name = name_modifier(conf_record['name']) @@ -28,6 +32,7 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags connector_func = None if conf_record['protocol'] == 'http': + sess = sess or aiohttp.ClientSession() if batch_size == 1 and isinstance(url, str): connector_func = HTTPConnector(sess, url, formatter, conf_record['name']).send else: @@ -40,20 +45,31 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags for u in urls: _worker_tasks.append(QueueListenerBatchifyer(sess, u, formatter, name, queue, batch_size)) + elif conf_record['protocol'] == 'highload': + if not gate: + transport_type = HIGHLOAD_SETTINGS['transport']['type'] + gateway_cls = transport_map[transport_type]['agent'] + gate = gateway_cls(config=HIGHLOAD_SETTINGS, + on_service_callback=on_service_callback, + on_channel_callback=on_channel_callback) + + connector_func = partial(gate.send_to_service, service=name) + if connector_func is None: raise ValueError(f'No connector function is defined while making a service {name}.') _service = Service(name, connector_func, state_processor_method, batch_size, tags, names_previous_services, simple_workflow_formatter) - return _service, _worker_tasks + return _service, _worker_tasks, sess, gate def add_bot_to_name(name): return f'bot_{name}' for anno in ANNOTATORS_1: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, - ['ANNOTATORS_1'], set()) + service, workers, session, gateway = make_service_from_config_rec(anno, session, + StateManager.add_annotation_dict, + ['ANNOTATORS_1'], set(), gateway) services.append(service) worker_tasks.extend(workers) @@ -61,8 +77,10 @@ def add_bot_to_name(name): if ANNOTATORS_2: for anno in ANNOTATORS_2: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, - ['ANNOTATORS_2'], previous_services) + service, workers, session, gateway = make_service_from_config_rec(anno, session, + StateManager.add_annotation_dict, + ['ANNOTATORS_2'], previous_services, + gateway) services.append(service) worker_tasks.extend(workers) @@ -70,8 +88,10 @@ def add_bot_to_name(name): if ANNOTATORS_3: for anno in ANNOTATORS_3: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, - ['ANNOTATORS_3'], previous_services) + service, workers, session, gateway = make_service_from_config_rec(anno, session, + StateManager.add_annotation_dict, + ['ANNOTATORS_3'], previous_services, + gateway) services.append(service) worker_tasks.extend(workers) @@ -79,8 +99,9 @@ def add_bot_to_name(name): if SKILL_SELECTORS: for ss in SKILL_SELECTORS: - service, workers = make_service_from_config_rec(ss, session, StateManager.do_nothing, - ['SKILL_SELECTORS', 'selector'], previous_services) + service, workers, session, gateway = make_service_from_config_rec(ss, session, StateManager.do_nothing, + ['SKILL_SELECTORS', 'selector'], + previous_services, gateway) services.append(service) worker_tasks.extend(workers) @@ -88,8 +109,9 @@ def add_bot_to_name(name): if SKILLS: for s in SKILLS: - service, workers = make_service_from_config_rec(s, session, StateManager.add_selected_skill_dict, - ['SKILLS'], previous_services) + service, workers, session, gateway = make_service_from_config_rec(s, session, + StateManager.add_selected_skill_dict, + ['SKILLS'], previous_services, gateway) services.append(service) worker_tasks.extend(workers) @@ -101,8 +123,10 @@ def add_bot_to_name(name): 1, ['RESPONSE_SELECTORS'], previous_services, simple_workflow_formatter)) else: for r in RESPONSE_SELECTORS: - service, workers = make_service_from_config_rec(r, session, StateManager.add_bot_utterance_simple_dict, - ['RESPONSE_SELECTORS'], previous_services) + service, workers, session, gateway = make_service_from_config_rec(r, session, + StateManager.add_bot_utterance_simple_dict, + ['RESPONSE_SELECTORS'], previous_services, + gateway) services.append(service) worker_tasks.extend(workers) @@ -110,8 +134,9 @@ def add_bot_to_name(name): if POSTPROCESSORS: for p in POSTPROCESSORS: - service, workers = make_service_from_config_rec(p, session, StateManager.add_text_dict, - ['POSTPROCESSORS'], previous_services) + service, workers, session, gateway = make_service_from_config_rec(p, session, StateManager.add_text_dict, + ['POSTPROCESSORS'], previous_services, + gateway) services.append(service) worker_tasks.extend(workers) @@ -119,8 +144,10 @@ def add_bot_to_name(name): if ANNOTATORS_1: for anno in ANNOTATORS_1: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, - ['POST_ANNOTATORS_1'], previous_services, add_bot_to_name) + service, workers, session, gateway = make_service_from_config_rec(anno, session, + StateManager.add_annotation_dict, + ['POST_ANNOTATORS_1'], previous_services, + gateway, add_bot_to_name) services.append(service) worker_tasks.extend(workers) @@ -128,18 +155,21 @@ def add_bot_to_name(name): if ANNOTATORS_2: for anno in ANNOTATORS_2: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, - ['POST_ANNOTATORS_2'], previous_services, add_bot_to_name) + service, workers, session, gateway = make_service_from_config_rec(anno, session, + StateManager.add_annotation_dict, + ['POST_ANNOTATORS_2'], previous_services, + gateway, add_bot_to_name) services.append(service) worker_tasks.extend(workers) previous_services = {i.name for i in services if 'POST_ANNOTATORS_2' in i.tags} for anno in ANNOTATORS_3: - service, workers = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, - ['POST_ANNOTATORS_3'], - previous_services, add_bot_to_name) + service, workers, session, gateway = make_service_from_config_rec(anno, session, + StateManager.add_annotation_dict, + ['POST_ANNOTATORS_3'], + previous_services, gateway, add_bot_to_name) services.append(service) worker_tasks.extend(workers) - return services, worker_tasks, session + return services, worker_tasks, session, gateway diff --git a/core/run.py b/core/run.py index d4e2fa33..5976c76c 100644 --- a/core/run.py +++ b/core/run.py @@ -2,6 +2,7 @@ import argparse import uuid import logging +from typing import Any, Hashable from aiohttp import web from datetime import datetime @@ -19,7 +20,6 @@ fh.setLevel(logging.INFO) logger.addHandler(fh) - parser = argparse.ArgumentParser() parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, choices=['cmd_client', 'http_client'], default='cmd_client') @@ -146,7 +146,20 @@ async def dialog(request): def main(): - services, workers, session = parse_old_config() + async def register_msg(utterance: str, user_telegram_id: Hashable, + user_device_type: Any, location=Any, + channel_type=str, deadline_timestamp=None, + require_response=False, **kwargs): + + return await _register_msg(utterance, user_telegram_id, + user_device_type, location, + channel_type, deadline_timestamp, + require_response, **kwargs) + + async def process(dialog_id, service_name=None, response=None): + return await _process(dialog_id, service_name, response) + + services, workers, session, gateway = parse_old_config(register_msg, process) if CHANNEL == 'cmd_client': endpoint = Service('cmd_responder', EventSetOutputConnector().send, @@ -154,7 +167,8 @@ def main(): input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) loop = asyncio.get_event_loop() loop.set_debug(args.debug) - register_msg, process = prepare_agent(services, endpoint, input_srv, use_response_logger=args.response_logger) + _register_msg, _process = prepare_agent(services, endpoint, input_srv, + use_response_logger=args.response_logger) future = asyncio.ensure_future(run(register_msg)) for i in workers: loop.create_task(i.call_service(process)) @@ -175,8 +189,8 @@ def main(): endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage).send, StateManager.save_dialog_dict, 1, ['responder']) input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) - register_msg, process_callable = prepare_agent(services, endpoint, input_srv, args.response_logger) - app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), + _register_msg, _process = prepare_agent(services, endpoint, input_srv, args.response_logger) + app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process, session), on_shutdown) web.run_app(app, port=args.port) From 5ba26ba8f56248662f42ca1076c9497b21ea980b Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 2 Oct 2019 14:05:12 +0300 Subject: [PATCH 051/133] Updated run.py after merge --- core/run.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/run.py b/core/run.py index 84f5bdfb..d43b54ff 100644 --- a/core/run.py +++ b/core/run.py @@ -147,8 +147,8 @@ async def dialog(request): def main(): async def register_msg(utterance: str, user_telegram_id: Hashable, - user_device_type: Any, location=Any, - channel_type=str, deadline_timestamp=None, + user_device_type: Any, location: Any, + channel_type: str, deadline_timestamp=None, require_response=False, **kwargs): return await _register_msg(utterance, user_telegram_id, @@ -156,8 +156,8 @@ async def register_msg(utterance: str, user_telegram_id: Hashable, channel_type, deadline_timestamp, require_response, **kwargs) - async def process(dialog_id, service_name=None, response=None): - return await _process(dialog_id, service_name, response) + async def process(dialog_id, service_name=None, response=None, response_time: float = None): + return await _process(dialog_id, service_name, response, response_time) services, workers, session, gateway = parse_old_config(register_msg, process) From a90a71dc8cc767587e856a567e9327018b0bbbfd Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 2 Oct 2019 17:36:47 +0300 Subject: [PATCH 052/133] Changed transport base classes --- core/transport/base.py | 65 ++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/core/transport/base.py b/core/transport/base.py index c8292836..b48d09db 100644 --- a/core/transport/base.py +++ b/core/transport/base.py @@ -1,66 +1,57 @@ -from typing import List, Callable, TypeVar, Union, Dict, Any, Awaitable, Optional - - -TAgentGateway = TypeVar('TAgentGateway', bound='AgentGatewayBase') -TServiceCaller = TypeVar('TServiceCaller', bound='ServiceCallerBase') -TServiceGateway = TypeVar('TServiceGateway', bound='ServiceGatewayBase') -TChannelConnector = TypeVar('TChannelConnector', bound='ChannelConnectorBase') -TChannelGateway = TypeVar('TChannelGateway', bound='ChannelGatewayBase') +from typing import List, Callable, TypeVar, Dict, Any class AgentGatewayBase: - _on_service_callback: Callable[[Dict], Awaitable] - _on_channel_callback: Callable[[str, str, str, bool], Awaitable] - - def __init__(self, on_service_callback: Callable[[Dict], Awaitable], - on_channel_callback: Callable[[str, str, str, bool], Awaitable], - *args, **kwargs): + _on_service_callback: Callable + _on_channel_callback: Callable + def __init__(self, on_service_callback: Callable, on_channel_callback: Callable, *args, **kwargs): super(AgentGatewayBase, self).__init__(*args, **kwargs) self._on_service_callback = on_service_callback self._on_channel_callback = on_channel_callback - async def send_to_service(self, service: str, dialog_state: dict) -> None: + async def send_to_service(self, service: str, dialog: Dict) -> None: raise NotImplementedError async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> None: raise NotImplementedError -class ServiceCallerBase: - _config: dict - _service_name: str - _formatter: Callable[[Union[List[Dict], Any], bool], Union[Any, List[Any]]] +TAgentGateway = TypeVar('TAgentGateway', bound=AgentGatewayBase) - def __init__(self, - config: dict, - formatter: Callable[[Union[List[Dict], Any], bool], Union[Any, List[Any]]]) -> None: +class ServiceGatewayConnectorBase: + _config: dict + _formatter: Callable + + def __init__(self, config: dict, formatter: Callable) -> None: self._config = config - self._service_name = config['service']['name'] self._formatter = formatter - def infer(self, dialog_states_batch: List[dict]) -> Optional[List[dict]]: + async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: raise NotImplementedError +TServiceGatewayConnectorBase = TypeVar('TServiceGatewayConnectorBase', bound=ServiceGatewayConnectorBase) + + class ServiceGatewayBase: - _service_caller: TServiceCaller + _to_service_callback: Callable - def __init__(self, service_caller: ServiceCallerBase, *args, **kwargs) -> None: + def __init__(self, to_service_callback: Callable, *args, **kwargs) -> None: super(ServiceGatewayBase, self).__init__(*args, **kwargs) - self._service_caller = service_caller + self._to_service_callback = to_service_callback + - def _infer(self, dialog_states_batch: List[dict]) -> List[dict]: - return self._service_caller.infer(dialog_states_batch) +TServiceGateway = TypeVar('TServiceGateway', bound=ServiceGatewayBase) -class ChannelConnectorBase: +class ChannelGatewayConnectorBase: _config: dict _channel_id: str - _on_channel_callback: Callable[[str, str, str, bool], Awaitable] + _on_channel_callback: Callable - def __init__(self, config: dict, on_channel_callback: Callable[[str, str, str, bool], Awaitable]) -> None: + def __init__(self, config: Dict, on_channel_callback: Callable) -> None: self._config = config self._channel_id = self._config['channel']['id'] self._on_channel_callback = on_channel_callback @@ -69,12 +60,18 @@ async def send_to_channel(self, user_id: str, response: str) -> None: raise NotImplementedError +TChannelGatewayConnectorBase = TypeVar('TChannelGatewayConnectorBase', bound=ChannelGatewayConnectorBase) + + class ChannelGatewayBase: - _to_channel_callback: Callable[[str, str], Awaitable] + _to_channel_callback: Callable - def __init__(self, to_channel_callback: Callable[[str, str], Awaitable], *args, **kwargs) -> None: + def __init__(self, to_channel_callback: Callable, *args, **kwargs) -> None: super(ChannelGatewayBase, self).__init__(*args, **kwargs) self._to_channel_callback = to_channel_callback async def send_to_agent(self, utterance: str, channel_id: str, user_id: str, reset_dialog: bool) -> None: raise NotImplementedError + + +TChannelGateway = TypeVar('TChannelGateway', bound=ChannelGatewayBase) From 3d73928cb3c0788e2b7f2384867e4a42ad44e37b Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 2 Oct 2019 17:37:14 +0300 Subject: [PATCH 053/133] Added GatewayHTTPConnector --- core/connectors.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/core/connectors.py b/core/connectors.py index ccd5271b..8d4c05bd 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -1,7 +1,9 @@ import asyncio import aiohttp import time -from typing import Dict, Callable +from typing import Dict, Callable, List, Any + +from core.transport.base import ServiceGatewayConnectorBase class HTTPConnector: @@ -22,6 +24,22 @@ async def send(self, payload: Dict, callback: Callable): ) +class GatewayHTTPConnector(ServiceGatewayConnectorBase): + _session: aiohttp.ClientSession + _url: str + + def __init__(self, config: dict, formatter: Callable) -> None: + super(GatewayHTTPConnector, self).__init__(config, formatter) + self._session = aiohttp.ClientSession() + self._url = config['url'] + + async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: + with await self._session.post(self._url, json=self._formatter(dialogs)) as resp: + responses_batch = await resp.json() + + return [self._formatter(response, mode='out') for response in responses_batch] + + class AioQueueConnector: def __init__(self, queue): self.queue = queue From 4b14b4a86f118e4eb4bede855f681c9cf206e964 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 2 Oct 2019 17:38:20 +0300 Subject: [PATCH 054/133] Changed transport service messages verification classes --- core/transport/messages.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/core/transport/messages.py b/core/transport/messages.py index 20123443..45ea9b50 100644 --- a/core/transport/messages.py +++ b/core/transport/messages.py @@ -1,4 +1,4 @@ -from typing import TypeVar +from typing import TypeVar, Any class MessageBase: @@ -18,13 +18,13 @@ class ServiceTaskMessage(MessageBase): msg_type = 'service_task' agent_name: str task_uuid: str - dialog_state: dict + dialog: dict - def __init__(self, agent_name: str, task_uuid: str, dialog_state: dict) -> None: + def __init__(self, agent_name: str, task_uuid: str, dialog: dict) -> None: self.msg_type = self.__class__.msg_type self.agent_name = agent_name self.task_uuid = task_uuid - self.dialog_state = dialog_state + self.dialog = dialog class ServiceResponseMessage(MessageBase): @@ -33,16 +33,19 @@ class ServiceResponseMessage(MessageBase): task_uuid: str service_name: str service_instance_id: str - partial_dialog_state: dict + dialog_id: str + response: Any + + def __init__(self, agent_name: str, task_uuid: str, service_name: str, service_instance_id: str, dialog_id: str, + response: Any) -> None: - def __init__(self, agent_name: str, task_uuid: str, service_name: str, service_instance_id: str, - partial_dialog_state: dict) -> None: self.msg_type = self.__class__.msg_type self.agent_name = agent_name self.task_uuid = task_uuid self.service_name = service_name self.service_instance_id = service_instance_id - self.partial_dialog_state = partial_dialog_state + self.dialog_id = dialog_id + self.response = response class ToChannelMessage(MessageBase): From 0a63a076656962228018aaee6c6df721c61c5fc0 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 2 Oct 2019 17:38:43 +0300 Subject: [PATCH 055/133] Changed rabbitmq gateways --- core/transport/gateways/rabbitmq.py | 51 ++++++++++++++--------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index d1b70842..ccd35a61 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -1,6 +1,5 @@ import asyncio import json -import functools import time from uuid import uuid4 from typing import Dict, List, Optional, Callable, Awaitable @@ -10,7 +9,6 @@ from aio_pika import Connection, Channel, Exchange, Queue, IncomingMessage, Message from core.transport.base import AgentGatewayBase, ServiceGatewayBase, ChannelGatewayBase -from core.transport.base import TServiceCaller from core.transport.messages import ServiceTaskMessage, ServiceResponseMessage, ToChannelMessage, FromChannelMessage from core.transport.messages import TMessageBase, get_transport_message @@ -128,8 +126,8 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: if isinstance(message_in, ServiceResponseMessage): logger.debug(f'Received service response message with task uuid {message_in.task_uuid}') - partial_dialog_state = message_in.partial_dialog_state - await self._loop.create_task(self._on_service_callback(partial_dialog_state=partial_dialog_state)) + response = message_in.response + await self._loop.create_task(self._on_service_callback(partial_dialog_state=response)) elif isinstance(message_in, FromChannelMessage): utterance = message_in.utterance @@ -142,10 +140,10 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: user_id=user_id, reset_dialog=reset_dialog)) - async def send_to_service(self, service_name: str, dialog_state: dict) -> None: + async def send_to_service(self, service_name: str, dialog: dict) -> None: task_uuid = str(uuid4()) - task = ServiceTaskMessage(agent_name=self._agent_name, task_uuid=task_uuid, dialog_state=dialog_state) - logger.debug(f'Created task {task_uuid} to service {service_name} with dialog state: {str(dialog_state)}') + task = ServiceTaskMessage(agent_name=self._agent_name, task_uuid=task_uuid, dialog=dialog) + logger.debug(f'Created task {task_uuid} to service {service_name} with dialog state: {str(dialog)}') message = Message(body=json.dumps(task.to_json()).encode('utf-8'), delivery_mode=aio_pika.DeliveryMode.PERSISTENT, @@ -171,8 +169,9 @@ async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> logger.debug(f'Published channel message: {str(channel_message_json)}') +# TODO: add separate service infer timeouts class RabbitMQServiceGateway(RabbitMQTransportBase, ServiceGatewayBase): - _service_caller: TServiceCaller + _to_service_callback: Callable _service_name: str _instance_id: str _batch_size: int @@ -180,8 +179,8 @@ class RabbitMQServiceGateway(RabbitMQTransportBase, ServiceGatewayBase): _add_to_buffer_lock: asyncio.Lock _infer_lock: asyncio.Lock - def __init__(self, config: dict, service_caller: TServiceCaller) -> None: - super(RabbitMQServiceGateway, self).__init__(config=config, service_caller=service_caller) + def __init__(self, config: dict, to_service_callback: Callable) -> None: + super(RabbitMQServiceGateway, self).__init__(config=config, to_service_callback=to_service_callback) self._loop = asyncio.get_event_loop() self._service_name = self._config['service']['name'] self._instance_id = self._config['service']['instance_id'] or f'{self._service_name}{str(uuid4())}' @@ -238,6 +237,7 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: tasks_batch = [ServiceTaskMessage.from_json(json.loads(message.body, encoding='utf-8')) for message in messages_batch] + # TODO: Think about proper infer errors and aknowledge handling processed_ok = await self._process_tasks(tasks_batch) if processed_ok: @@ -253,34 +253,33 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: self._infer_lock.release() async def _process_tasks(self, tasks_batch: List[ServiceTaskMessage]) -> bool: - task_agent_names_batch, task_uuids_batch, dialog_states_batch = \ - zip(*[(task.agent_name, task.task_uuid, task.dialog_state) for task in tasks_batch]) + task_agent_names_batch, task_uuids_batch, dialogs_batch = \ + zip(*[(task.agent_name, task.task_uuid, task.dialog) for task in tasks_batch]) logger.debug(f'Prepared for infering tasks {str(task_uuids_batch)}') - # TODO: Think about proper infer errors and aknowledge handling try: - inferer = functools.partial(self._infer, dialog_states_batch) - infer_timeout = self._config['service']['infer_timeout_sec'] - responses_batch = await asyncio.wait_for(self._loop.run_in_executor(executor=None, func=inferer), - infer_timeout) - logger.debug(f'Processed tasks {str(task_uuids_batch)}') - except asyncio.TimeoutError: - responses_batch = None + responses_batch = await asyncio.wait_for(self._to_service_callback(dialogs_batch), + self._response_timeout_sec) + + for i, response in enumerate(responses_batch): + await self._loop.create_task(self._send_results(task_agent_names_batch[i], + task_uuids_batch[i], + dialogs_batch[i]['id'], + response)) - if responses_batch: - await asyncio.wait([self._send_results(task_agent_names_batch[i], task_uuids_batch[i], partial_state) - for i, partial_state in enumerate(responses_batch)]) + logger.debug(f'Processed tasks {str(task_uuids_batch)}') return True - else: + except asyncio.TimeoutError: return False - async def _send_results(self, agent_name: str, task_uuid: str, partial_dialog_state: dict) -> None: + async def _send_results(self, agent_name: str, task_uuid: str, dialog_id: str, response: dict) -> None: result = ServiceResponseMessage(agent_name=agent_name, task_uuid=task_uuid, service_name=self._service_name, service_instance_id=self._instance_id, - partial_dialog_state=partial_dialog_state) + dialog_id=dialog_id, + response=response) message = Message(body=json.dumps(result.to_json()).encode('utf-8'), delivery_mode=aio_pika.DeliveryMode.PERSISTENT, From ebe9bb53e75adb0be40f6bb3005de806e6a0a5fe Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Wed, 2 Oct 2019 19:16:44 +0300 Subject: [PATCH 056/133] http connector fix --- core/connectors.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/connectors.py b/core/connectors.py index ccd5271b..0c524194 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -15,11 +15,11 @@ async def send(self, payload: Dict, callback: Callable): async with self.session.post(self.url, json=self.formatter([payload])) as resp: response = await resp.json() response_time = time.time() - await callback( - dialog_id=payload['id'], service_name=self.service_name, - response={self.service_name: self.formatter(response[0], mode='out')}, - response_time=response_time - ) + await callback( + dialog_id=payload['id'], service_name=self.service_name, + response={self.service_name: self.formatter(response[0], mode='out')}, + response_time=response_time + ) class AioQueueConnector: From f2db3b55aa1351a820b42156d04a445f0de6de90 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Wed, 2 Oct 2019 21:58:01 +0300 Subject: [PATCH 057/133] http connector session workaround --- core/connectors.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/connectors.py b/core/connectors.py index 0c524194..5eeb0b65 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -12,9 +12,10 @@ def __init__(self, session: aiohttp.ClientSession, url: str, formatter: Callable self.service_name = service_name async def send(self, payload: Dict, callback: Callable): - async with self.session.post(self.url, json=self.formatter([payload])) as resp: - response = await resp.json() - response_time = time.time() + async with aiohttp.ClientSession() as session: + async with session.post(self.url, json=self.formatter([payload])) as resp: + response = await resp.json() + response_time = time.time() await callback( dialog_id=payload['id'], service_name=self.service_name, response={self.service_name: self.formatter(response[0], mode='out')}, From da437d66e36d1d6011d72407a4202c83faf95e46 Mon Sep 17 00:00:00 2001 From: litinsky Date: Thu, 3 Oct 2019 02:06:24 +0300 Subject: [PATCH 058/133] Refactored sending service replies to agent --- core/transport/gateways/rabbitmq.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index ccd35a61..b3ee94c2 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -12,7 +12,6 @@ from core.transport.messages import ServiceTaskMessage, ServiceResponseMessage, ToChannelMessage, FromChannelMessage from core.transport.messages import TMessageBase, get_transport_message - AGENT_IN_EXCHANGE_NAME_TEMPLATE = '{agent_namespace}_e_in' AGENT_OUT_EXCHANGE_NAME_TEMPLATE = '{agent_namespace}_e_out' AGENT_QUEUE_NAME_TEMPLATE = '{agent_namespace}_q_agent_{agent_name}' @@ -262,12 +261,14 @@ async def _process_tasks(self, tasks_batch: List[ServiceTaskMessage]) -> bool: responses_batch = await asyncio.wait_for(self._to_service_callback(dialogs_batch), self._response_timeout_sec) + results_replies = [] + for i, response in enumerate(responses_batch): - await self._loop.create_task(self._send_results(task_agent_names_batch[i], - task_uuids_batch[i], - dialogs_batch[i]['id'], - response)) + results_replies.append( + self._send_results(task_agent_names_batch[i], task_uuids_batch[i], dialogs_batch[i]['id'], response) + ) + await asyncio.gather(*results_replies) logger.debug(f'Processed tasks {str(task_uuids_batch)}') return True except asyncio.TimeoutError: From 24fef7ef486f6a48e91833ed9410dfaefc57ca04 Mon Sep 17 00:00:00 2001 From: litinsky Date: Thu, 3 Oct 2019 12:29:05 +0300 Subject: [PATCH 059/133] Decomposed agent gateway preparation --- core/config_parser.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index 07f5571d..b2f3c3e4 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -12,6 +12,14 @@ from core.transport import transport_map +def prepare_agent_gateway(on_channel_callback, on_service_callback): + transport_type = HIGHLOAD_SETTINGS['transport']['type'] + gateway_cls = transport_map[transport_type]['agent'] + return gateway_cls(config=HIGHLOAD_SETTINGS, + on_service_callback=on_service_callback, + on_channel_callback=on_channel_callback) + + def parse_old_config(on_channel_callback, on_service_callback): services = [] worker_tasks = [] @@ -46,13 +54,7 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags _worker_tasks.append(QueueListenerBatchifyer(sess, u, formatter, name, queue, batch_size)) elif conf_record['protocol'] == 'highload': - if not gate: - transport_type = HIGHLOAD_SETTINGS['transport']['type'] - gateway_cls = transport_map[transport_type]['agent'] - gate = gateway_cls(config=HIGHLOAD_SETTINGS, - on_service_callback=on_service_callback, - on_channel_callback=on_channel_callback) - + gate = gate or prepare_agent_gateway(on_channel_callback, on_service_callback) connector_func = partial(gate.send_to_service, service=name) if connector_func is None: From 2a4e6d2e6657f9505ce5440da3d16000ae3c0e25 Mon Sep 17 00:00:00 2001 From: litinsky Date: Thu, 3 Oct 2019 12:30:28 +0300 Subject: [PATCH 060/133] Prepared blanks for gateway connectors --- core/connectors.py | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/core/connectors.py b/core/connectors.py index 8d4c05bd..f38f2ad0 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -24,22 +24,6 @@ async def send(self, payload: Dict, callback: Callable): ) -class GatewayHTTPConnector(ServiceGatewayConnectorBase): - _session: aiohttp.ClientSession - _url: str - - def __init__(self, config: dict, formatter: Callable) -> None: - super(GatewayHTTPConnector, self).__init__(config, formatter) - self._session = aiohttp.ClientSession() - self._url = config['url'] - - async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: - with await self._session.post(self._url, json=self._formatter(dialogs)) as resp: - responses_batch = await resp.json() - - return [self._formatter(response, mode='out') for response in responses_batch] - - class AioQueueConnector: def __init__(self, queue): self.queue = queue @@ -125,3 +109,32 @@ async def send(self, payload: Dict, callback: Callable): response_time = time.time() await callback(payload['dialog']['id'], self.service_name, " ", response_time) + + +class AgentGatewayOutputConnector: + _to_channel_callback: Callable + + def __init__(self, to_channel_callback: callable()): + self._to_channel_callback = to_channel_callback + + async def send(self, payload: Dict, **kwargs): + await self._to_channel_callback(payload) + + +class ServiceGatewayHTTPConnector(ServiceGatewayConnectorBase): + _session: aiohttp.ClientSession + _url: str + + def __init__(self, config: dict, formatter: Callable) -> None: + super(ServiceGatewayHTTPConnector, self).__init__(config, formatter) + self._session = aiohttp.ClientSession() + self._url = config['url'] + + async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: + with await self._session.post(self._url, json=self._formatter(dialogs)) as resp: + responses_batch = await resp.json() + + return [self._formatter(response, mode='out') for response in responses_batch] + + + From 23df0645000ecfe983432aab3e92a2151bdf658b Mon Sep 17 00:00:00 2001 From: litinsky Date: Thu, 3 Oct 2019 12:35:46 +0300 Subject: [PATCH 061/133] Added blanks for different run modes to run.py --- core/run.py | 46 ++++++++++++++++++++++++----- core/transport/gateways/rabbitmq.py | 1 + 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/core/run.py b/core/run.py index d43b54ff..69819720 100644 --- a/core/run.py +++ b/core/run.py @@ -4,7 +4,7 @@ import logging from typing import Any, Hashable -from aiohttp import web +from aiohttp import web, ClientSession from datetime import datetime from string import hexdigits @@ -14,19 +14,23 @@ from core.config_parser import parse_old_config from core.state_manager import StateManager -logger = logging.getLogger('service_logger') -logger.setLevel(logging.INFO) +service_logger = logging.getLogger('service_logger') +service_logger.setLevel(logging.INFO) fh = logging.FileHandler('../service.log') fh.setLevel(logging.INFO) -logger.addHandler(fh) +service_logger.addHandler(fh) parser = argparse.ArgumentParser() -parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, +parser.add_argument('-m', '--mode', help='run agent in default mode or as one of the high load components', + choices=['default', 'agent', 'service', 'channel']) +parser.add_argument('-ch', '--channel', help='run agent in telegram, cmd_client or http_client', type=str, choices=['cmd_client', 'http_client'], default='cmd_client') parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) parser.add_argument('-d', '--debug', help='run in debug mode', action='store_true') parser.add_argument('-rl', '--response-logger', help='run agent with services response logging', action='store_true') + args = parser.parse_args() +MODE = args.mode CHANNEL = args.channel @@ -36,7 +40,7 @@ def response_logger(workflow_record): send = service_data['send_time'] if not send or not done: continue - logger.info(f'{service_name}\t{round(done - send, 5)}\tseconds') + service_logger.info(f'{service_name}\t{round(done - send, 5)}\tseconds') def prepare_agent(services, endpoint: Service, input_serv: Service, use_response_logger: bool): @@ -145,7 +149,7 @@ async def dialog(request): raise web.HTTPBadRequest(reason='dialog id should be 24-character hex string') -def main(): +def run_default(): async def register_msg(utterance: str, user_telegram_id: Hashable, user_device_type: Any, location: Any, channel_type: str, deadline_timestamp=None, @@ -180,11 +184,14 @@ async def process(dialog_id, service_name=None, response=None, response_time: fl raise e finally: future.cancel() - loop.run_until_complete(session.close()) + if session: + loop.run_until_complete(session.close()) loop.stop() loop.close() logging.shutdown() elif CHANNEL == 'http_client': + if not session: + session = ClientSession() intermediate_storage = {} endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage, 'http_responder').send, StateManager.save_dialog_dict, 1, ['responder']) @@ -196,5 +203,28 @@ async def process(dialog_id, service_name=None, response=None, response_time: fl web.run_app(app, port=args.port) +def run_agent(): + raise NotImplementedError + + +def run_service(): + raise NotImplementedError + + +def run_channel(): + raise NotImplementedError + + +def main(): + if MODE == 'default': + run_default() + elif MODE == 'agent': + run_agent() + elif MODE == 'service': + run_service() + elif MODE == 'channel': + run_channel() + + if __name__ == '__main__': main() diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index b3ee94c2..981118ab 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -29,6 +29,7 @@ # TODO: add proper RabbitMQ SSL authentication # TODO: add load balancing for stateful skills +# TODO: add graceful connection close class RabbitMQTransportBase: _config: dict _loop: asyncio.AbstractEventLoop From db602eabcf8778ee23783ea5339e706b591c067e Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 00:31:28 +0300 Subject: [PATCH 062/133] Fixed _on_service_callback arguments in RabbitMQ agent gateway --- core/transport/gateways/rabbitmq.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index 981118ab..92bd5c61 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -126,19 +126,18 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: if isinstance(message_in, ServiceResponseMessage): logger.debug(f'Received service response message with task uuid {message_in.task_uuid}') - response = message_in.response - await self._loop.create_task(self._on_service_callback(partial_dialog_state=response)) + response_time = time.time() + await self._loop.create_task(self._on_service_callback(dialog_id=message_in.dialog_id, + service_name=message_in.service_name, + response=message_in.response, + response_time=response_time)) elif isinstance(message_in, FromChannelMessage): - utterance = message_in.utterance - channel_id = message_in.channel_id - user_id = message_in.user_id - reset_dialog = message_in.reset_dialog - logger.debug(f'Received message from channel {channel_id}, user {user_id}') - await self._loop.create_task(self._on_channel_callback(utterance=utterance, - channel_id=channel_id, - user_id=user_id, - reset_dialog=reset_dialog)) + logger.debug(f'Received message from channel {message_in.channel_id}, user {message_in.user_id}') + await self._loop.create_task(self._on_channel_callback(utterance=message_in.utterance, + channel_id=message_in.channel_id, + user_id=message_in.user_id, + reset_dialog=message_in.reset_dialog)) async def send_to_service(self, service_name: str, dialog: dict) -> None: task_uuid = str(uuid4()) From d12b89248d3a528e82492968f7b648d60fe35a1b Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 01:04:50 +0300 Subject: [PATCH 063/133] Added logging --- core/__init__.py | 5 +++++ core/log.py | 35 +++++++++++++++++++++++++++++++++++ core/run.py | 1 + log_config.yml | 28 ++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+) create mode 100644 core/log.py create mode 100644 log_config.yml diff --git a/core/__init__.py b/core/__init__.py index 89e51f14..30b47e66 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1 +1,6 @@ +from core.log import init_logger + + STATE_API_VERSION = "0.12" + +init_logger() diff --git a/core/log.py b/core/log.py new file mode 100644 index 00000000..29f6fc7f --- /dev/null +++ b/core/log.py @@ -0,0 +1,35 @@ +import logging +import logging.config +from pathlib import Path + +import yaml + + +def init_logger(): + agent_path = Path(__file__).resolve().parent.parent + log_config_path = agent_path / 'log_config.yaml' + + with log_config_path.open('r') as f: + log_config = yaml.safe_load(f) + + configured_loggers = [log_config.get('root', {})] + [logger for logger in + log_config.get('loggers', {}).values()] + + used_handlers = {handler for log in configured_loggers for handler in log.get('handlers', [])} + + for handler_id, handler in list(log_config['handlers'].items()): + if handler_id not in used_handlers: + del log_config['handlers'][handler_id] + elif 'filename' in handler.keys(): + filename = handler['filename'] + + if filename[0] == '~': + logfile_path = Path(filename).expanduser().resolve() + elif filename[0] == '/': + logfile_path = Path(filename).resolve() + else: + logfile_path = agent_path / filename + + handler['filename'] = str(logfile_path) + + logging.config.dictConfig(log_config) \ No newline at end of file diff --git a/core/run.py b/core/run.py index 69819720..6a6f6440 100644 --- a/core/run.py +++ b/core/run.py @@ -14,6 +14,7 @@ from core.config_parser import parse_old_config from core.state_manager import StateManager +# TODO move service logging configuration to log_config.yml service_logger = logging.getLogger('service_logger') service_logger.setLevel(logging.INFO) fh = logging.FileHandler('../service.log') diff --git a/log_config.yml b/log_config.yml new file mode 100644 index 00000000..0bac3052 --- /dev/null +++ b/log_config.yml @@ -0,0 +1,28 @@ +version: 1 +disable_existing_loggers: false +loggers: + core: + level: 'DEBUG' + propagate: true + handlers: ['stderr'] + service_logger: +formatters: + default: + format: "%(asctime)s.%(msecs)d %(levelname)s in '%(name)s'['%(module)s'] at line %(lineno)d: %(message)s" + datefmt: "%Y-%m-%d %H:%M:%S" +handlers: + file: + class: 'logging.FileHandler' + level: 'DEBUG' + formatter: 'default' + filename: '~/dp_agent.log' + stdout: + class: 'logging.StreamHandler' + level: 'DEBUG' + formatter: 'default' + stream: 'ext://sys.stdout' + stderr: + class: 'logging.StreamHandler' + level: 'DEBUG' + formatter: 'default' + stream: 'ext://sys.stderr' \ No newline at end of file From f477fcd12d1d11aee92544dd02543d990a0b5b56 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 01:30:45 +0300 Subject: [PATCH 064/133] Added highload setting --- config.py | 5 +++++ core/config_parser.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/config.py b/config.py index f2a6ed10..8a2d7472 100644 --- a/config.py +++ b/config.py @@ -33,6 +33,7 @@ SKILLS = [ { "name": "odqa", + "highload": False, "protocol": "http", "host": "127.0.0.1", "port": 2080, @@ -46,6 +47,7 @@ }, { "name": "chitchat", + "highload": False, "protocol": "http", "host": "127.0.0.1", "port": 2081, @@ -63,6 +65,7 @@ ANNOTATORS_1 = [ { "name": "ner", + "highload": False, "protocol": "http", "host": "127.0.0.1", "port": 2083, @@ -79,6 +82,7 @@ ANNOTATORS_2 = [ { "name": "sentiment", + "highload": False, "protocol": "http", "host": "127.0.0.1", "port": 2084, @@ -97,6 +101,7 @@ SKILL_SELECTORS = [ { "name": "chitchat_odqa", + "highload": False, "protocol": "http", "host": "127.0.0.1", "port": 2082, diff --git a/core/config_parser.py b/core/config_parser.py index b2f3c3e4..3f279719 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -1,4 +1,6 @@ from functools import partial +from itertools import chain +from copy import deepcopy import aiohttp import asyncio @@ -39,7 +41,10 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags connector_func = None - if conf_record['protocol'] == 'http': + if conf_record['highload']: + gate = gate or prepare_agent_gateway(on_channel_callback, on_service_callback) + connector_func = partial(gate.send_to_service, service=name) + elif conf_record['protocol'] == 'http': sess = sess or aiohttp.ClientSession() if batch_size == 1 and isinstance(url, str): connector_func = HTTPConnector(sess, url, formatter, name).send @@ -53,9 +58,6 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags for u in urls: _worker_tasks.append(QueueListenerBatchifyer(sess, u, formatter, name, queue, batch_size)) - elif conf_record['protocol'] == 'highload': - gate = gate or prepare_agent_gateway(on_channel_callback, on_service_callback) - connector_func = partial(gate.send_to_service, service=name) if connector_func is None: raise ValueError(f'No connector function is defined while making a service {name}.') @@ -180,3 +182,23 @@ def add_bot_to_name(name): worker_tasks.extend(workers) return services, worker_tasks, session, gateway + + +def get_service_gateway_config(service_name): + matching_config = None + + for config in chain(SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, + SKILL_SELECTORS, RESPONSE_SELECTORS, POSTPROCESSORS): + config_name = config['name'] + + if config_name == service_name: + matching_config = config + break + + if not matching_config: + raise ValueError(f'Config for service {service_name} was not found') + + service_config = deepcopy(HIGHLOAD_SETTINGS) + service_config['service'] = matching_config + + return service_config From 2f52c45c39cff99f82703922d3405ecc6c12794f Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 01:40:03 +0300 Subject: [PATCH 065/133] Minor fix in connectors.py --- core/connectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/connectors.py b/core/connectors.py index f38f2ad0..1ea2a99f 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -114,7 +114,7 @@ async def send(self, payload: Dict, callback: Callable): class AgentGatewayOutputConnector: _to_channel_callback: Callable - def __init__(self, to_channel_callback: callable()): + def __init__(self, to_channel_callback: Callable): self._to_channel_callback = to_channel_callback async def send(self, payload: Dict, **kwargs): From b0780bc59400e45af678fd42fd9b433bec6639dd Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 01:46:48 +0300 Subject: [PATCH 066/133] Changed param name in ServiceGatewayConnectorBase --- core/connectors.py | 6 +++--- core/transport/base.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/connectors.py b/core/connectors.py index 1ea2a99f..2e41fde3 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -125,10 +125,10 @@ class ServiceGatewayHTTPConnector(ServiceGatewayConnectorBase): _session: aiohttp.ClientSession _url: str - def __init__(self, config: dict, formatter: Callable) -> None: - super(ServiceGatewayHTTPConnector, self).__init__(config, formatter) + def __init__(self, service_config: dict, formatter: Callable) -> None: + super(ServiceGatewayHTTPConnector, self).__init__(service_config, formatter) self._session = aiohttp.ClientSession() - self._url = config['url'] + self._url = service_config['url'] async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: with await self._session.post(self._url, json=self._formatter(dialogs)) as resp: diff --git a/core/transport/base.py b/core/transport/base.py index b48d09db..e7b24944 100644 --- a/core/transport/base.py +++ b/core/transport/base.py @@ -21,11 +21,11 @@ async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> class ServiceGatewayConnectorBase: - _config: dict + _service_config: dict _formatter: Callable - def __init__(self, config: dict, formatter: Callable) -> None: - self._config = config + def __init__(self, service_config: dict, formatter: Callable) -> None: + self._service_config = service_config self._formatter = formatter async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: From d16dd2db58c3bbf8d5b2b8a4b6c1677fbd6fc32b Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 01:47:38 +0300 Subject: [PATCH 067/133] Small fixes in config_parser.py --- core/config_parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index 3f279719..29896539 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -198,7 +198,7 @@ def get_service_gateway_config(service_name): if not matching_config: raise ValueError(f'Config for service {service_name} was not found') - service_config = deepcopy(HIGHLOAD_SETTINGS) - service_config['service'] = matching_config + gateway_config = deepcopy(HIGHLOAD_SETTINGS) + gateway_config['service'] = matching_config - return service_config + return gateway_config From 3374c027798d4ca3e132e440ca7c76e04f999173 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 01:57:11 +0300 Subject: [PATCH 068/133] Removed redundand class attribute doc --- core/transport/gateways/rabbitmq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index 92bd5c61..042ee7b6 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -170,7 +170,6 @@ async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> # TODO: add separate service infer timeouts class RabbitMQServiceGateway(RabbitMQTransportBase, ServiceGatewayBase): - _to_service_callback: Callable _service_name: str _instance_id: str _batch_size: int From d8c412db99bf7ee440488da31c43853d7ceac044 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 02:07:36 +0300 Subject: [PATCH 069/133] Added service run mode --- core/run.py | 20 ++++++++++++++++++-- core/transport/__init__.py | 8 ++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/core/run.py b/core/run.py index 6a6f6440..f53e7af3 100644 --- a/core/run.py +++ b/core/run.py @@ -11,8 +11,9 @@ from core.agent import Agent from core.pipeline import Pipeline, Service from core.connectors import EventSetOutputConnector, HttpOutputConnector -from core.config_parser import parse_old_config +from core.config_parser import parse_old_config, get_service_gateway_config from core.state_manager import StateManager +from core.transport import gateways_map, connectors_map # TODO move service logging configuration to log_config.yml service_logger = logging.getLogger('service_logger') @@ -24,6 +25,7 @@ parser = argparse.ArgumentParser() parser.add_argument('-m', '--mode', help='run agent in default mode or as one of the high load components', choices=['default', 'agent', 'service', 'channel']) +parser.add_argument('-n', '--service-name', help='service name for service run mode', type=str) parser.add_argument('-ch', '--channel', help='run agent in telegram, cmd_client or http_client', type=str, choices=['cmd_client', 'http_client'], default='cmd_client') parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) @@ -209,7 +211,21 @@ def run_agent(): def run_service(): - raise NotImplementedError + service_name = args.service_name + gateway_config = get_service_gateway_config(service_name) + service_config = gateway_config['service'] + + formatter = service_config['formatter'] + connector_type = service_config['protocol'] + connector_cls = connectors_map[connector_type] + connector = connector_cls(service_config=service_config, formatter=formatter) + + transport_type = gateway_config['transport']['type'] + gateway_cls = gateways_map[transport_type]['service'] + _gateway = gateway_cls(config=gateway_config, service_caller=connector.send_to_service) + + loop = asyncio.get_event_loop() + loop.run_forever() def run_channel(): diff --git a/core/transport/__init__.py b/core/transport/__init__.py index 5058b6e1..79faf800 100644 --- a/core/transport/__init__.py +++ b/core/transport/__init__.py @@ -1,10 +1,14 @@ from core.transport.gateways.rabbitmq import RabbitMQAgentGateway, RabbitMQServiceGateway, RabbitMQChannelGateway +from core.connectors import ServiceGatewayHTTPConnector - -transport_map = { +gateways_map = { 'rabbitmq': { 'agent': RabbitMQAgentGateway, 'service': RabbitMQServiceGateway, 'channel': RabbitMQChannelGateway } +} + +connectors_map = { + 'http': ServiceGatewayHTTPConnector } \ No newline at end of file From cf8897189c38c909d434cf4b3e39faf39dbcfde3 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 02:12:27 +0300 Subject: [PATCH 070/133] Fixed wrong filename in log.py --- core/log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/log.py b/core/log.py index 29f6fc7f..575ef238 100644 --- a/core/log.py +++ b/core/log.py @@ -7,7 +7,7 @@ def init_logger(): agent_path = Path(__file__).resolve().parent.parent - log_config_path = agent_path / 'log_config.yaml' + log_config_path = agent_path / 'log_config.yml' with log_config_path.open('r') as f: log_config = yaml.safe_load(f) From 4942936130594f212aa2207b531d53af133f0e59 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 02:15:42 +0300 Subject: [PATCH 071/133] Fixed bug in log config --- log_config.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/log_config.yml b/log_config.yml index 0bac3052..b45824e4 100644 --- a/log_config.yml +++ b/log_config.yml @@ -5,7 +5,6 @@ loggers: level: 'DEBUG' propagate: true handlers: ['stderr'] - service_logger: formatters: default: format: "%(asctime)s.%(msecs)d %(levelname)s in '%(name)s'['%(module)s'] at line %(lineno)d: %(message)s" From 3714ac82ed8f438aca2d6c8b39de6aa8523eb13d Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 02:28:39 +0300 Subject: [PATCH 072/133] Fixed instance id handling --- core/transport/gateways/rabbitmq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index 042ee7b6..c0e68d6f 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -181,7 +181,7 @@ def __init__(self, config: dict, to_service_callback: Callable) -> None: super(RabbitMQServiceGateway, self).__init__(config=config, to_service_callback=to_service_callback) self._loop = asyncio.get_event_loop() self._service_name = self._config['service']['name'] - self._instance_id = self._config['service']['instance_id'] or f'{self._service_name}{str(uuid4())}' + self._instance_id = self._config['service'].get('instance_id', None) or f'{self._service_name}{str(uuid4())}' self._batch_size = self._config['service']['batch_size'] self._incoming_messages_buffer = [] From 48484bf00153e283d2beb21c4bec3c256a466033 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 02:29:26 +0300 Subject: [PATCH 073/133] Added fixes for service mode --- core/__init__.py | 14 ++++++++++++++ core/config_parser.py | 4 ++-- core/run.py | 4 ++-- core/transport/__init__.py | 14 -------------- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/core/__init__.py b/core/__init__.py index 30b47e66..ea9ba0e9 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1,6 +1,20 @@ +from core.transport.gateways.rabbitmq import RabbitMQAgentGateway, RabbitMQServiceGateway, RabbitMQChannelGateway +from core.connectors import ServiceGatewayHTTPConnector from core.log import init_logger STATE_API_VERSION = "0.12" init_logger() + +gateways_map = { + 'rabbitmq': { + 'agent': RabbitMQAgentGateway, + 'service': RabbitMQServiceGateway, + 'channel': RabbitMQChannelGateway + } +} + +connectors_map = { + 'http': ServiceGatewayHTTPConnector +} \ No newline at end of file diff --git a/core/config_parser.py b/core/config_parser.py index 29896539..d82c4100 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -11,12 +11,12 @@ QueueListenerBatchifyer from core.pipeline import Service, simple_workflow_formatter from core.state_manager import StateManager -from core.transport import transport_map +from core import gateways_map def prepare_agent_gateway(on_channel_callback, on_service_callback): transport_type = HIGHLOAD_SETTINGS['transport']['type'] - gateway_cls = transport_map[transport_type]['agent'] + gateway_cls = gateways_map[transport_type]['agent'] return gateway_cls(config=HIGHLOAD_SETTINGS, on_service_callback=on_service_callback, on_channel_callback=on_channel_callback) diff --git a/core/run.py b/core/run.py index f53e7af3..4e08fdf6 100644 --- a/core/run.py +++ b/core/run.py @@ -13,7 +13,7 @@ from core.connectors import EventSetOutputConnector, HttpOutputConnector from core.config_parser import parse_old_config, get_service_gateway_config from core.state_manager import StateManager -from core.transport import gateways_map, connectors_map +from core import gateways_map, connectors_map # TODO move service logging configuration to log_config.yml service_logger = logging.getLogger('service_logger') @@ -222,7 +222,7 @@ def run_service(): transport_type = gateway_config['transport']['type'] gateway_cls = gateways_map[transport_type]['service'] - _gateway = gateway_cls(config=gateway_config, service_caller=connector.send_to_service) + _gateway = gateway_cls(config=gateway_config, to_service_callback=connector.send_to_service) loop = asyncio.get_event_loop() loop.run_forever() diff --git a/core/transport/__init__.py b/core/transport/__init__.py index 79faf800..e69de29b 100644 --- a/core/transport/__init__.py +++ b/core/transport/__init__.py @@ -1,14 +0,0 @@ -from core.transport.gateways.rabbitmq import RabbitMQAgentGateway, RabbitMQServiceGateway, RabbitMQChannelGateway -from core.connectors import ServiceGatewayHTTPConnector - -gateways_map = { - 'rabbitmq': { - 'agent': RabbitMQAgentGateway, - 'service': RabbitMQServiceGateway, - 'channel': RabbitMQChannelGateway - } -} - -connectors_map = { - 'http': ServiceGatewayHTTPConnector -} \ No newline at end of file From c65e57ac6f64e1c1a76fc7a8a368f58a4f40ef6a Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 02:48:32 +0300 Subject: [PATCH 074/133] Added default mode --- core/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/run.py b/core/run.py index 4e08fdf6..f75d6770 100644 --- a/core/run.py +++ b/core/run.py @@ -24,7 +24,7 @@ parser = argparse.ArgumentParser() parser.add_argument('-m', '--mode', help='run agent in default mode or as one of the high load components', - choices=['default', 'agent', 'service', 'channel']) + default='default', choices=['default', 'agent', 'service', 'channel']) parser.add_argument('-n', '--service-name', help='service name for service run mode', type=str) parser.add_argument('-ch', '--channel', help='run agent in telegram, cmd_client or http_client', type=str, choices=['cmd_client', 'http_client'], default='cmd_client') From afd755bb7aae6953bf018f6fc64782601c885d05 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 02:49:12 +0300 Subject: [PATCH 075/133] Fixed highload connector_func generation --- core/config_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/config_parser.py b/core/config_parser.py index d82c4100..d3081b5b 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -43,7 +43,7 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags if conf_record['highload']: gate = gate or prepare_agent_gateway(on_channel_callback, on_service_callback) - connector_func = partial(gate.send_to_service, service=name) + connector_func = partial(gate.send_to_service, service_name=name) elif conf_record['protocol'] == 'http': sess = sess or aiohttp.ClientSession() if batch_size == 1 and isinstance(url, str): From e7d15a70c18bf8f9cea579475b2e4bfd36f49b12 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 02:50:28 +0300 Subject: [PATCH 076/133] Added batch_size param --- config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config.py b/config.py index 8a2d7472..159352ab 100644 --- a/config.py +++ b/config.py @@ -34,6 +34,7 @@ { "name": "odqa", "highload": False, + "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2080, @@ -48,6 +49,7 @@ { "name": "chitchat", "highload": False, + "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2081, @@ -66,6 +68,7 @@ { "name": "ner", "highload": False, + "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2083, @@ -83,6 +86,7 @@ { "name": "sentiment", "highload": False, + "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2084, @@ -102,6 +106,7 @@ { "name": "chitchat_odqa", "highload": False, + "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2082, From 81847a92478049196789e3159a33c9d90079dc0d Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Fri, 4 Oct 2019 10:00:35 +0300 Subject: [PATCH 077/133] async telegram client initial commit --- core/run.py | 45 ++++++++++++++++++++++++++++++++++++++++++++- dockerfile_agent | 14 +++++++++++--- requirements.txt | 5 ++++- 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/core/run.py b/core/run.py index 27e5124c..49ccded8 100644 --- a/core/run.py +++ b/core/run.py @@ -6,6 +6,7 @@ from aiohttp import web from datetime import datetime from string import hexdigits +from os import getenv from core.agent import Agent from core.pipeline import Pipeline, Service @@ -13,6 +14,11 @@ from core.config_parser import parse_old_config from core.state_manager import StateManager +from aiogram import Bot +from aiogram.utils import executor +from aiogram.dispatcher import Dispatcher + + logger = logging.getLogger('service_logger') logger.setLevel(logging.INFO) fh = logging.FileHandler('../service.log') @@ -22,7 +28,7 @@ parser = argparse.ArgumentParser() parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, - choices=['cmd_client', 'http_client'], default='cmd_client') + choices=['cmd_client', 'http_client', 'telegram'], default='cmd_client') parser.add_argument('-p', '--port', help='port for http client, default 4242', default=4242) parser.add_argument('-d', '--debug', help='run in debug mode', action='store_true') parser.add_argument('-rl', '--response-logger', help='run agent with services response logging', action='store_true') @@ -62,6 +68,21 @@ async def run(register_msg): print('Bot: ', response['dialog']['utterances'][-1]['text']) +class TelegramMessageProcessor: + def __init__(self, register_msg): + self.register_msg = register_msg + + async def handle_message(self, message): + response = await self.register_msg( + utterance=message.text, + user_telegram_id=str(message.from_user.id), + user_device_type='telegram', + date_time=datetime.now(), location='', channel_type='telegram', + require_response=True + ) + await message.answer(response['dialog']['utterances'][-1]['text']) + + async def on_shutdown(app): await app['client_session'].close() @@ -181,6 +202,28 @@ def main(): web.run_app(app, port=args.port) + elif CHANNEL == 'telegram': + token = getenv('TELEGRAM_TOKEN') + proxy = getenv('TELEGRAM_PROXY') + + loop = asyncio.get_event_loop() + + bot = Bot(token=token, loop=loop, proxy=proxy) + dp = Dispatcher(bot) + endpoint = Service('telegram_responder', EventSetOutputConnector('telegram_responder').send, + StateManager.save_dialog_dict, 1, ['responder']) + input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) + register_msg, process = prepare_agent( + services, endpoint, input_srv, use_response_logger=args.response_logger) + + for i in workers: + loop.create_task(i.call_service(process)) + tg_msg_processor = TelegramMessageProcessor(register_msg) + + dp.message_handler()(tg_msg_processor.handle_message) + + executor.start_polling(dp, skip_updates=True) + if __name__ == '__main__': main() diff --git a/dockerfile_agent b/dockerfile_agent index e13fdb22..e6684070 100644 --- a/dockerfile_agent +++ b/dockerfile_agent @@ -1,7 +1,15 @@ -FROM ubuntu:latest +FROM python:3.7-slim-stretch + +ENV DEBIAN_FRONTEND noninteractive RUN apt-get update -y --fix-missing && \ - apt-get install -y python3 python3-pip python3-dev build-essential git openssl + apt-get install -y -q \ + build-essential \ + openssl \ + git \ + libssl-dev \ + libffi-dev && \ + rm -rf /var/lib/apt/lists/* ENV PYTHONIOENCODING=utf-8 @@ -17,4 +25,4 @@ ENV LC_ALL C.UTF-8 ENV PYTHONPATH "${PYTONPATH}:/dp-agent" ENV DPA_LAUNCHING_ENV "docker" -EXPOSE 4242 \ No newline at end of file +EXPOSE 4242 diff --git a/requirements.txt b/requirements.txt index 6d470c25..d342a733 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ pytelegrambotapi==3.5.2 mongoengine==0.17.0 -aiohttp==3.5.4 +aiogram==2.3 +aiohttp==3.6.1 +aiohttp-socks==0.2.2 aiohttp-swagger==1.0.9 pyyaml==5.1 +aiogram==2.3 From c9db808ef8ad60267c7fe572ab9ca821599e0438 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 14:00:04 +0300 Subject: [PATCH 078/133] Change service and channel incoming messages logging to more expanded --- core/transport/gateways/rabbitmq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index c0e68d6f..ba3e573c 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -125,7 +125,7 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: await message.ack() if isinstance(message_in, ServiceResponseMessage): - logger.debug(f'Received service response message with task uuid {message_in.task_uuid}') + logger.debug(f'Received service response message {str(message_in.to_json())}') response_time = time.time() await self._loop.create_task(self._on_service_callback(dialog_id=message_in.dialog_id, service_name=message_in.service_name, @@ -133,7 +133,7 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: response_time=response_time)) elif isinstance(message_in, FromChannelMessage): - logger.debug(f'Received message from channel {message_in.channel_id}, user {message_in.user_id}') + logger.debug(f'Received message from channel {str(message_in.to_json())}') await self._loop.create_task(self._on_channel_callback(utterance=message_in.utterance, channel_id=message_in.channel_id, user_id=message_in.user_id, From ed0492359242a18f1dbaf464730a82bd696f28ac Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 14:06:16 +0300 Subject: [PATCH 079/133] Added AgentGatewayToServiceConnector, fixed ServiceGatewayHTTPConnector --- core/config_parser.py | 6 +++--- core/connectors.py | 27 ++++++++++++++++----------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index d3081b5b..d51a2d14 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -1,4 +1,3 @@ -from functools import partial from itertools import chain from copy import deepcopy @@ -8,7 +7,7 @@ from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, \ RESPONSE_SELECTORS, POSTPROCESSORS, HIGHLOAD_SETTINGS from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, \ - QueueListenerBatchifyer + QueueListenerBatchifyer, AgentGatewayToServiceConnector from core.pipeline import Service, simple_workflow_formatter from core.state_manager import StateManager from core import gateways_map @@ -43,7 +42,8 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags if conf_record['highload']: gate = gate or prepare_agent_gateway(on_channel_callback, on_service_callback) - connector_func = partial(gate.send_to_service, service_name=name) + connector_func = AgentGatewayToServiceConnector(to_service_callback=gate.send_to_service, + service_name=name).send elif conf_record['protocol'] == 'http': sess = sess or aiohttp.ClientSession() if batch_size == 1 and isinstance(url, str): diff --git a/core/connectors.py b/core/connectors.py index 2e41fde3..b671c3dd 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -111,30 +111,35 @@ async def send(self, payload: Dict, callback: Callable): " ", response_time) -class AgentGatewayOutputConnector: - _to_channel_callback: Callable +class AgentGatewayToChannelConnector: + pass - def __init__(self, to_channel_callback: Callable): - self._to_channel_callback = to_channel_callback - async def send(self, payload: Dict, **kwargs): - await self._to_channel_callback(payload) +class AgentGatewayToServiceConnector: + _to_service_callback: Callable + _service_name: str + + def __init__(self, to_service_callback: Callable, service_name: str): + self._to_service_callback = to_service_callback + self._service_name = service_name + + async def send(self, payload: Dict, **_kwargs): + await self._to_service_callback(dialog=payload, service_name=self._service_name) class ServiceGatewayHTTPConnector(ServiceGatewayConnectorBase): _session: aiohttp.ClientSession _url: str + _service_name: str def __init__(self, service_config: dict, formatter: Callable) -> None: super(ServiceGatewayHTTPConnector, self).__init__(service_config, formatter) self._session = aiohttp.ClientSession() self._url = service_config['url'] + self._service_name = service_config['name'] async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: - with await self._session.post(self._url, json=self._formatter(dialogs)) as resp: + async with await self._session.post(self._url, json=self._formatter(dialogs)) as resp: responses_batch = await resp.json() - return [self._formatter(response, mode='out') for response in responses_batch] - - - + return [{self._service_name: self._formatter(response, mode='out')} for response in responses_batch] From e4abcb86f40447c660e4f49b7dad87333e2537a4 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Fri, 4 Oct 2019 14:27:29 +0300 Subject: [PATCH 080/133] cleaned dockerfile and requirements --- dockerfile_agent | 2 +- requirements.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dockerfile_agent b/dockerfile_agent index e6684070..517997c5 100644 --- a/dockerfile_agent +++ b/dockerfile_agent @@ -14,7 +14,7 @@ RUN apt-get update -y --fix-missing && \ ENV PYTHONIOENCODING=utf-8 COPY requirements.txt / -RUN pip3 install -r requirements.txt +RUN pip install -r requirements.txt RUN mkdir dp-agent WORKDIR /dp-agent diff --git a/requirements.txt b/requirements.txt index d342a733..7624066c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -pytelegrambotapi==3.5.2 mongoengine==0.17.0 aiogram==2.3 aiohttp==3.6.1 From f3601bb20d2c2c4f6408bf456e6c294b7c7524c9 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 4 Oct 2019 14:37:13 +0300 Subject: [PATCH 081/133] docs: update a python version and agent running command --- docs/source/intro/overview.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/intro/overview.rst b/docs/source/intro/overview.rst index 7259caaa..a018d695 100644 --- a/docs/source/intro/overview.rst +++ b/docs/source/intro/overview.rst @@ -107,7 +107,7 @@ Also you can include in the Agent configuration any external service running on Services Deployment =================== -1. Create a new **Python 3.6.7** virtual environment. +1. Create a new **Python 3.7.4** virtual environment. 2. Install requirements for Docker config generator: @@ -192,7 +192,7 @@ Agent can run both from container and from a local machine. The default Agent po .. code:: bash - python3 -m core.run + python -m core.run **Local machine** @@ -221,13 +221,13 @@ Agent can run both from container and from a local machine. The default Agent po .. code:: bash - python3 -m core.run + python -m core.run or via the Telegram: .. code:: bash - python3 -m core.run -ch telegram + python -m core.run -ch telegram **HTTP api server** @@ -235,7 +235,7 @@ Agent can run both from container and from a local machine. The default Agent po .. code:: bash - python3 -m core.run -ch http_client [-p 4242] + python -m core.run -ch http_client [-p 4242] In both cases api will be accessible on your localhost From 9c55281bb56512be63eae779c11d903ccf3f7f88 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 14:39:57 +0300 Subject: [PATCH 082/133] Minor fixes in config.py --- config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index 159352ab..b86fd118 100644 --- a/config.py +++ b/config.py @@ -7,11 +7,12 @@ DB_PORT = getenv('DB_PORT', 27017) DB_PATH = getenv('DB_PATH', '/data/db') +# TODO: move response timeout to transport settings HIGHLOAD_SETTINGS = { 'agent_namespace': 'deeppavlov_agent', 'agent': { - 'agent_name': 'dp_agent', - 'response_timeout': 120 + 'name': 'dp_agent', + 'response_timeout_sec': 120 }, 'channels': {}, 'transport': { From 5f2ea5c80607165438cd94f201ac9c5da4e060d2 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 4 Oct 2019 15:13:12 +0300 Subject: [PATCH 083/133] docs: update running with GPU section --- docs/source/intro/overview.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/source/intro/overview.rst b/docs/source/intro/overview.rst index a018d695..29e602b9 100644 --- a/docs/source/intro/overview.rst +++ b/docs/source/intro/overview.rst @@ -115,10 +115,17 @@ Services Deployment pip -r install gen_requirements.txt -3. Install and configure Docker_ and Docker-compose_ (version 1.19.0 or later). +3. Install and configure Docker_ (version 19.03.2 or later) and Docker-compose_ (version 1.19.0 or later). 4. (optional) Install nvidia-docker_ if you wish to run some services on GPU. + To be able to run GPU-based docker files please make sure about two things on your host system: + + * Your nvidia driver has to support the CUDA version installed in the GPU-based docker file. + * Please notice that ``docker-compose.yml`` of **3.7** version doesn't officially support `runtime: nvidia` + option anymore, so you have to manually edit ``/etc/docker/daemon.json`` on your system. Read in the + nvidia-container-runtime_ documentation how to do it. + 5. Create a directory for storing downloaded data, such as pre-trained models. It should be located outside the agent project's home directory. @@ -396,4 +403,5 @@ a list of utterances as input. Use the existing ``utils/ru_test_phrases.py`` or .. _Docker environment configuration: https://github.com/deepmipt/dp-agent/blob/master/docker-compose.yml .. _docker-exec: https://docs.docker.com/engine/reference/commandline/exec/ .. _state: https://deeppavlov-agent.readthedocs.io/en/latest/_static/api.html -.. _mongo-docs: https://docs.mongodb.com/manual/tutorial/manage-mongodb-processes/ \ No newline at end of file +.. _mongo-docs: https://docs.mongodb.com/manual/tutorial/manage-mongodb-processes/ +.. _nvidia-container-runtime: https://github.com/NVIDIA/nvidia-container-runtime \ No newline at end of file From 05bb004ed3cbbe8f5edb65688fe11277fd8dfbe9 Mon Sep 17 00:00:00 2001 From: litinsky Date: Fri, 4 Oct 2019 19:06:25 +0300 Subject: [PATCH 084/133] Changed fucking shit in run.py --- config.py | 1 + core/config_parser.py | 6 +++--- core/run.py | 33 ++++++++++++----------------- core/transport/base.py | 26 +++++++++++++++++++---- core/transport/gateways/rabbitmq.py | 6 +++--- 5 files changed, 42 insertions(+), 30 deletions(-) diff --git a/config.py b/config.py index b86fd118..94d34476 100644 --- a/config.py +++ b/config.py @@ -8,6 +8,7 @@ DB_PATH = getenv('DB_PATH', '/data/db') # TODO: move response timeout to transport settings +# TODO: change naming to transport settings HIGHLOAD_SETTINGS = { 'agent_namespace': 'deeppavlov_agent', 'agent': { diff --git a/core/config_parser.py b/core/config_parser.py index d51a2d14..57cce68f 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -13,7 +13,7 @@ from core import gateways_map -def prepare_agent_gateway(on_channel_callback, on_service_callback): +def prepare_agent_gateway(on_channel_callback=None, on_service_callback=None): transport_type = HIGHLOAD_SETTINGS['transport']['type'] gateway_cls = gateways_map[transport_type]['agent'] return gateway_cls(config=HIGHLOAD_SETTINGS, @@ -21,7 +21,7 @@ def prepare_agent_gateway(on_channel_callback, on_service_callback): on_channel_callback=on_channel_callback) -def parse_old_config(on_channel_callback, on_service_callback): +def parse_old_config(): services = [] worker_tasks = [] session = None @@ -41,7 +41,7 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags connector_func = None if conf_record['highload']: - gate = gate or prepare_agent_gateway(on_channel_callback, on_service_callback) + gate = gate or prepare_agent_gateway() connector_func = AgentGatewayToServiceConnector(to_service_callback=gate.send_to_service, service_name=name).send elif conf_record['protocol'] == 'http': diff --git a/core/run.py b/core/run.py index e99c966e..41dee61d 100644 --- a/core/run.py +++ b/core/run.py @@ -20,7 +20,6 @@ from core.state_manager import StateManager from core import gateways_map, connectors_map - # TODO move service logging configuration to log_config.yml service_logger = logging.getLogger('service_logger') service_logger.setLevel(logging.INFO) @@ -175,20 +174,7 @@ async def dialog(request): def run_default(): - async def register_msg(utterance: str, user_telegram_id: Hashable, - user_device_type: Any, location: Any, - channel_type: str, deadline_timestamp=None, - require_response=False, **kwargs): - - return await _register_msg(utterance, user_telegram_id, - user_device_type, location, - channel_type, deadline_timestamp, - require_response, **kwargs) - - async def process(dialog_id, service_name=None, response=None, response_time: float = None): - return await _process(dialog_id, service_name, response, response_time) - - services, workers, session, gateway = parse_old_config(register_msg, process) + services, workers, session, gateway = parse_old_config() if CHANNEL == 'cmd_client': endpoint = Service('cmd_responder', EventSetOutputConnector('cmd_responder').send, @@ -196,8 +182,10 @@ async def process(dialog_id, service_name=None, response=None, response_time: fl input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) loop = asyncio.get_event_loop() loop.set_debug(args.debug) - _register_msg, _process = prepare_agent(services, endpoint, input_srv, - use_response_logger=args.response_logger) + register_msg, process = prepare_agent(services, endpoint, input_srv, use_response_logger=args.response_logger) + if gateway: + gateway.on_channel_callback = register_msg + gateway.on_service_callback = process future = asyncio.ensure_future(run(register_msg)) for i in workers: loop.create_task(i.call_service(process)) @@ -221,7 +209,10 @@ async def process(dialog_id, service_name=None, response=None, response_time: fl endpoint = Service('http_responder', HttpOutputConnector(intermediate_storage, 'http_responder').send, StateManager.save_dialog_dict, 1, ['responder']) input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) - _register_msg, _process = prepare_agent(services, endpoint, input_srv, args.response_logger) + register_msg, process = prepare_agent(services, endpoint, input_srv, args.response_logger) + if gateway: + gateway.on_channel_callback = register_msg + gateway.on_service_callback = process app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process, session), on_shutdown) @@ -238,9 +229,11 @@ async def process(dialog_id, service_name=None, response=None, response_time: fl endpoint = Service('telegram_responder', EventSetOutputConnector('telegram_responder').send, StateManager.save_dialog_dict, 1, ['responder']) input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) - _register_msg, _process = prepare_agent( + register_msg, process = prepare_agent( services, endpoint, input_srv, use_response_logger=args.response_logger) - + if gateway: + gateway.on_channel_callback = register_msg + gateway.on_service_callback = process for i in workers: loop.create_task(i.call_service(process)) tg_msg_processor = TelegramMessageProcessor(register_msg) diff --git a/core/transport/base.py b/core/transport/base.py index e7b24944..cfa2ddf9 100644 --- a/core/transport/base.py +++ b/core/transport/base.py @@ -1,15 +1,33 @@ -from typing import List, Callable, TypeVar, Dict, Any +from typing import List, Callable, TypeVar, Dict, Any, Optional class AgentGatewayBase: - _on_service_callback: Callable - _on_channel_callback: Callable + _on_service_callback: Optional[Callable] + _on_channel_callback: Optional[Callable] + + def __init__(self, on_service_callback: Optional[Callable] = None, + on_channel_callback: Optional[Callable] = None, *args, **kwargs): - def __init__(self, on_service_callback: Callable, on_channel_callback: Callable, *args, **kwargs): super(AgentGatewayBase, self).__init__(*args, **kwargs) self._on_service_callback = on_service_callback self._on_channel_callback = on_channel_callback + @property + def on_service_callback(self): + return self._on_service_callback + + @on_service_callback.setter + def on_service_callback(self, callback: Callable): + self._on_service_callback = callback + + @property + def on_channel_callback(self): + return self._on_channel_callback + + @on_channel_callback.setter + def on_channel_callback(self, callback: Callable): + self._on_channel_callback = callback + async def send_to_service(self, service: str, dialog: Dict) -> None: raise NotImplementedError diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index ba3e573c..6940c3d0 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -95,8 +95,8 @@ class RabbitMQAgentGateway(RabbitMQTransportBase, AgentGatewayBase): _service_responses: Dict[str, dict] def __init__(self, config: dict, - on_service_callback: Callable[[Dict], Awaitable], - on_channel_callback: Callable[[str, str, str, bool], Awaitable]) -> None: + on_service_callback: Optional[Callable] = None, + on_channel_callback: Optional[Callable] = None) -> None: super(RabbitMQAgentGateway, self).__init__(config=config, on_service_callback=on_service_callback, @@ -294,7 +294,7 @@ class RabbitMQChannelGateway(RabbitMQTransportBase, ChannelGatewayBase): _agent_name: str _channel_id: str - def __init__(self, config: dict, to_channel_callback: Callable[[str, str], Awaitable]) -> None: + def __init__(self, config: dict, to_channel_callback: Callable) -> None: super(RabbitMQChannelGateway, self).__init__(config=config, to_channel_callback=to_channel_callback) self._loop = asyncio.get_event_loop() self._agent_name = self._config['agent']['name'] From f21a29bf7330641d39b075a2b881d3854ff8836a Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 11:55:06 +0300 Subject: [PATCH 085/133] Moved service_logger config to log_config.yml --- core/run.py | 7 +------ log_config.yml | 12 +++++++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/core/run.py b/core/run.py index 41dee61d..536b0d9d 100644 --- a/core/run.py +++ b/core/run.py @@ -20,15 +20,10 @@ from core.state_manager import StateManager from core import gateways_map, connectors_map -# TODO move service logging configuration to log_config.yml + service_logger = logging.getLogger('service_logger') -service_logger.setLevel(logging.INFO) -fh = logging.FileHandler('../service.log') -fh.setLevel(logging.INFO) -service_logger.addHandler(fh) parser = argparse.ArgumentParser() - parser.add_argument('-m', '--mode', help='run agent in default mode or as one of the high load components', default='default', choices=['default', 'agent', 'service', 'channel']) parser.add_argument('-n', '--service-name', help='service name for service run mode', type=str) diff --git a/log_config.yml b/log_config.yml index b45824e4..bdb1a11d 100644 --- a/log_config.yml +++ b/log_config.yml @@ -5,6 +5,11 @@ loggers: level: 'DEBUG' propagate: true handlers: ['stderr'] + service_logger: + level: 'INFO' + propagate: true + handlers: ['file_service_logger'] + formatters: default: format: "%(asctime)s.%(msecs)d %(levelname)s in '%(name)s'['%(module)s'] at line %(lineno)d: %(message)s" @@ -24,4 +29,9 @@ handlers: class: 'logging.StreamHandler' level: 'DEBUG' formatter: 'default' - stream: 'ext://sys.stderr' \ No newline at end of file + stream: 'ext://sys.stderr' + file_service_logger: + class: 'logging.FileHandler' + level: 'DEBUG' + formatter: 'default' + filename: 'service.log' \ No newline at end of file From d92c507fee0725c1ff10c410cfa780cf8b233525 Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 18:21:55 +0300 Subject: [PATCH 086/133] Changed url param name in ServiceGatewayHTTPConnector --- core/connectors.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/connectors.py b/core/connectors.py index 9d3b4a20..08e5c3df 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -136,9 +136,11 @@ class ServiceGatewayHTTPConnector(ServiceGatewayConnectorBase): def __init__(self, service_config: dict, formatter: Callable) -> None: super(ServiceGatewayHTTPConnector, self).__init__(service_config, formatter) self._session = aiohttp.ClientSession() - self._url = service_config['url'] self._service_name = service_config['name'] + default_infer_url = 'http://127.0.0.1:5000/model' + self._url = service_config.get('infer_url', default_infer_url) + async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: async with await self._session.post(self._url, json=self._formatter(dialogs)) as resp: responses_batch = await resp.json() From eb4b8983678f5db5d39f346990006c26f5858b09 Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 18:28:05 +0300 Subject: [PATCH 087/133] Changed protocol nameing from rabbitmq to AMQP --- config.py | 9 ++------- core/__init__.py | 2 +- core/config_parser.py | 11 ++++++----- core/transport/gateways/rabbitmq.py | 12 ++++++------ 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/config.py b/config.py index 94d34476..2f4de8f3 100644 --- a/config.py +++ b/config.py @@ -17,8 +17,8 @@ }, 'channels': {}, 'transport': { - 'type': 'rabbitmq', - 'rabbitmq': { + 'type': 'AMQP', + 'AMQP': { 'host': '127.0.0.1', 'port': 5672, 'login': 'guest', @@ -35,7 +35,6 @@ SKILLS = [ { "name": "odqa", - "highload": False, "batch_size": 1, "protocol": "http", "host": "127.0.0.1", @@ -50,7 +49,6 @@ }, { "name": "chitchat", - "highload": False, "batch_size": 1, "protocol": "http", "host": "127.0.0.1", @@ -69,7 +67,6 @@ ANNOTATORS_1 = [ { "name": "ner", - "highload": False, "batch_size": 1, "protocol": "http", "host": "127.0.0.1", @@ -87,7 +84,6 @@ ANNOTATORS_2 = [ { "name": "sentiment", - "highload": False, "batch_size": 1, "protocol": "http", "host": "127.0.0.1", @@ -107,7 +103,6 @@ SKILL_SELECTORS = [ { "name": "chitchat_odqa", - "highload": False, "batch_size": 1, "protocol": "http", "host": "127.0.0.1", diff --git a/core/__init__.py b/core/__init__.py index ea9ba0e9..eba2547f 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -8,7 +8,7 @@ init_logger() gateways_map = { - 'rabbitmq': { + 'AMQP': { 'agent': RabbitMQAgentGateway, 'service': RabbitMQServiceGateway, 'channel': RabbitMQChannelGateway diff --git a/core/config_parser.py b/core/config_parser.py index 57cce68f..5fca0981 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -40,11 +40,7 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags connector_func = None - if conf_record['highload']: - gate = gate or prepare_agent_gateway() - connector_func = AgentGatewayToServiceConnector(to_service_callback=gate.send_to_service, - service_name=name).send - elif conf_record['protocol'] == 'http': + if conf_record['protocol'] == 'http': sess = sess or aiohttp.ClientSession() if batch_size == 1 and isinstance(url, str): connector_func = HTTPConnector(sess, url, formatter, name).send @@ -59,6 +55,11 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags _worker_tasks.append(QueueListenerBatchifyer(sess, u, formatter, name, queue, batch_size)) + elif conf_record['protocol'] == 'AMQP': + gate = gate or prepare_agent_gateway() + connector_func = AgentGatewayToServiceConnector(to_service_callback=gate.send_to_service, + service_name=name).send + if connector_func is None: raise ValueError(f'No connector function is defined while making a service {name}.') diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index 6940c3d0..db31d2b1 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -28,7 +28,7 @@ # TODO: add proper RabbitMQ SSL authentication -# TODO: add load balancing for stateful skills +# TODO: add load balancing for stateful skills or remove SERVICE_INSTANCE_ROUTING_KEY_TEMPLATE # TODO: add graceful connection close class RabbitMQTransportBase: _config: dict @@ -50,11 +50,11 @@ def __init__(self, config: dict, *args, **kwargs): async def _connect(self) -> None: agent_namespace = self._config['agent_namespace'] - host = self._config['transport']['rabbitmq']['host'] - port = self._config['transport']['rabbitmq']['port'] - login = self._config['transport']['rabbitmq']['login'] - password = self._config['transport']['rabbitmq']['password'] - virtualhost = self._config['transport']['rabbitmq']['virtualhost'] + host = self._config['transport']['AMQP']['host'] + port = self._config['transport']['AMQP']['port'] + login = self._config['transport']['AMQP']['login'] + password = self._config['transport']['AMQP']['password'] + virtualhost = self._config['transport']['AMQP']['virtualhost'] logger.info('Starting RabbitMQ connection...') From 2baaab275b06d28e808de3c5db36ead3df5f46d9 Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 18:33:54 +0300 Subject: [PATCH 088/133] Set batch_size default to 1 in rabbitmq config --- config.py | 5 +++++ core/transport/gateways/rabbitmq.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index 2f4de8f3..71465820 100644 --- a/config.py +++ b/config.py @@ -32,6 +32,11 @@ AGENT_ENV_FILE = "agent.env" +# TODO: may be we should move default values setting for service config params from code to special config section +# Implicit service default params: +# batch_size = 1 +# infer_url = http://127.0.0.1:5000/model + SKILLS = [ { "name": "odqa", diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index db31d2b1..80307517 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -2,7 +2,7 @@ import json import time from uuid import uuid4 -from typing import Dict, List, Optional, Callable, Awaitable +from typing import Dict, List, Optional, Callable from logging import getLogger import aio_pika @@ -182,7 +182,7 @@ def __init__(self, config: dict, to_service_callback: Callable) -> None: self._loop = asyncio.get_event_loop() self._service_name = self._config['service']['name'] self._instance_id = self._config['service'].get('instance_id', None) or f'{self._service_name}{str(uuid4())}' - self._batch_size = self._config['service']['batch_size'] + self._batch_size = self._config['service'].get('batch_size', 1) self._incoming_messages_buffer = [] self._add_to_buffer_lock = asyncio.Lock() From 098542cc92947723f609fa90ae26c855aa4b7efc Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 18:35:51 +0300 Subject: [PATCH 089/133] renamed HIGHLOAD_SETTINGS to transport settings --- config.py | 3 +-- core/config_parser.py | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/config.py b/config.py index 71465820..1997b0b1 100644 --- a/config.py +++ b/config.py @@ -8,8 +8,7 @@ DB_PATH = getenv('DB_PATH', '/data/db') # TODO: move response timeout to transport settings -# TODO: change naming to transport settings -HIGHLOAD_SETTINGS = { +TRANSPORT_SETTINGS = { 'agent_namespace': 'deeppavlov_agent', 'agent': { 'name': 'dp_agent', diff --git a/core/config_parser.py b/core/config_parser.py index 5fca0981..a1ceb61d 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -5,7 +5,7 @@ import asyncio from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, \ - RESPONSE_SELECTORS, POSTPROCESSORS, HIGHLOAD_SETTINGS + RESPONSE_SELECTORS, POSTPROCESSORS, TRANSPORT_SETTINGS from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, \ QueueListenerBatchifyer, AgentGatewayToServiceConnector from core.pipeline import Service, simple_workflow_formatter @@ -14,9 +14,9 @@ def prepare_agent_gateway(on_channel_callback=None, on_service_callback=None): - transport_type = HIGHLOAD_SETTINGS['transport']['type'] + transport_type = TRANSPORT_SETTINGS['transport']['type'] gateway_cls = gateways_map[transport_type]['agent'] - return gateway_cls(config=HIGHLOAD_SETTINGS, + return gateway_cls(config=TRANSPORT_SETTINGS, on_service_callback=on_service_callback, on_channel_callback=on_channel_callback) @@ -199,7 +199,7 @@ def get_service_gateway_config(service_name): if not matching_config: raise ValueError(f'Config for service {service_name} was not found') - gateway_config = deepcopy(HIGHLOAD_SETTINGS) + gateway_config = deepcopy(TRANSPORT_SETTINGS) gateway_config['service'] = matching_config return gateway_config From a9e549584cf9d494bb7bb2e7180185c49fef0903 Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 18:40:16 +0300 Subject: [PATCH 090/133] Renamed param response_timeout_sec to utterance_lifetime_sec --- config.py | 2 +- core/transport/gateways/rabbitmq.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/config.py b/config.py index 1997b0b1..07a829fe 100644 --- a/config.py +++ b/config.py @@ -12,8 +12,8 @@ 'agent_namespace': 'deeppavlov_agent', 'agent': { 'name': 'dp_agent', - 'response_timeout_sec': 120 }, + 'utterance_lifetime_sec': 120, 'channels': {}, 'transport': { 'type': 'AMQP', diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index 80307517..e122cb40 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -39,13 +39,13 @@ class RabbitMQTransportBase: _agent_in_channel: Channel _agent_out_channel: Channel _in_queue: Optional[Queue] - _response_timeout_sec: int + _utterance_lifetime_sec: int def __init__(self, config: dict, *args, **kwargs): super(RabbitMQTransportBase, self).__init__(*args, **kwargs) self._config = config self._in_queue = None - self._response_timeout_sec = config['agent']['response_timeout_sec'] + self._utterance_lifetime_sec = config['utterance_lifetime_sec'] async def _connect(self) -> None: agent_namespace = self._config['agent_namespace'] @@ -146,7 +146,7 @@ async def send_to_service(self, service_name: str, dialog: dict) -> None: message = Message(body=json.dumps(task.to_json()).encode('utf-8'), delivery_mode=aio_pika.DeliveryMode.PERSISTENT, - expiration=self._response_timeout_sec) + expiration=self._utterance_lifetime_sec) routing_key = SERVICE_ROUTING_KEY_TEMPLATE.format(service_name=service_name) await self._agent_out_exchange.publish(message=message, routing_key=routing_key) @@ -161,7 +161,7 @@ async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> channel_message_json = channel_message.to_json() message = Message(body=json.dumps(channel_message_json).encode('utf-8'), delivery_mode=aio_pika.DeliveryMode.PERSISTENT, - expiration=self._response_timeout_sec) + expiration=self._utterance_lifetime_sec) routing_key = CHANNEL_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name, channel_id=channel_id) await self._agent_out_exchange.publish(message=message, routing_key=routing_key) @@ -258,7 +258,7 @@ async def _process_tasks(self, tasks_batch: List[ServiceTaskMessage]) -> bool: try: responses_batch = await asyncio.wait_for(self._to_service_callback(dialogs_batch), - self._response_timeout_sec) + self._utterance_lifetime_sec) results_replies = [] @@ -283,7 +283,7 @@ async def _send_results(self, agent_name: str, task_uuid: str, dialog_id: str, r message = Message(body=json.dumps(result.to_json()).encode('utf-8'), delivery_mode=aio_pika.DeliveryMode.PERSISTENT, - expiration=self._response_timeout_sec) + expiration=self._utterance_lifetime_sec) routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=agent_name) await self._agent_in_exchange.publish(message=message, routing_key=routing_key) @@ -336,7 +336,7 @@ async def send_to_agent(self, utterance: str, channel_id: str, user_id: str, res message_json = message_from_channel.to_json() message = Message(body=json.dumps(message_json).encode('utf-8'), delivery_mode=aio_pika.DeliveryMode.PERSISTENT, - expiration=self._response_timeout_sec) + expiration=self._utterance_lifetime_sec) routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=self._agent_name) await self._agent_in_exchange.publish(message=message, routing_key=routing_key) From 03477e6f1996233f73051ee7809a013a70a92e5a Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 18:44:13 +0300 Subject: [PATCH 091/133] Changed agent name location in transport config structure --- config.py | 5 +---- core/transport/gateways/rabbitmq.py | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/config.py b/config.py index 07a829fe..5b7a721f 100644 --- a/config.py +++ b/config.py @@ -7,12 +7,9 @@ DB_PORT = getenv('DB_PORT', 27017) DB_PATH = getenv('DB_PATH', '/data/db') -# TODO: move response timeout to transport settings TRANSPORT_SETTINGS = { 'agent_namespace': 'deeppavlov_agent', - 'agent': { - 'name': 'dp_agent', - }, + 'agent_name': 'dp_agent', 'utterance_lifetime_sec': 120, 'channels': {}, 'transport': { diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index e122cb40..1d570c75 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -103,7 +103,7 @@ def __init__(self, config: dict, on_channel_callback=on_channel_callback) self._loop = asyncio.get_event_loop() - self._agent_name = self._config['agent']['name'] + self._agent_name = self._config['agent_name'] self._loop.run_until_complete(self._connect()) self._loop.run_until_complete(self._setup_queues()) @@ -297,7 +297,7 @@ class RabbitMQChannelGateway(RabbitMQTransportBase, ChannelGatewayBase): def __init__(self, config: dict, to_channel_callback: Callable) -> None: super(RabbitMQChannelGateway, self).__init__(config=config, to_channel_callback=to_channel_callback) self._loop = asyncio.get_event_loop() - self._agent_name = self._config['agent']['name'] + self._agent_name = self._config['agent_name'] self._channel_id = self._config['channel']['id'] self._loop.run_until_complete(self._connect()) From 8580280d72ab61d62a86a739621bbdf1445993c4 Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 18:49:07 +0300 Subject: [PATCH 092/133] Fixed key in connectors_map --- core/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/__init__.py b/core/__init__.py index eba2547f..73bd644b 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -16,5 +16,5 @@ } connectors_map = { - 'http': ServiceGatewayHTTPConnector -} \ No newline at end of file + 'AMQP': ServiceGatewayHTTPConnector +} From e435d2f45cdc28bd43535c6f79602b07aebdabd4 Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 19:09:54 +0300 Subject: [PATCH 093/133] Removed redundand TODO --- core/transport/gateways/rabbitmq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index 1d570c75..edd670c9 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -29,7 +29,6 @@ # TODO: add proper RabbitMQ SSL authentication # TODO: add load balancing for stateful skills or remove SERVICE_INSTANCE_ROUTING_KEY_TEMPLATE -# TODO: add graceful connection close class RabbitMQTransportBase: _config: dict _loop: asyncio.AbstractEventLoop From 13afb15d6234f034450ff299acef7edc1d728b8c Mon Sep 17 00:00:00 2001 From: litinsky Date: Sat, 5 Oct 2019 19:41:52 +0300 Subject: [PATCH 094/133] Removed explicit batchtime param from config --- config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/config.py b/config.py index 5b7a721f..a7824965 100644 --- a/config.py +++ b/config.py @@ -36,7 +36,6 @@ SKILLS = [ { "name": "odqa", - "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2080, @@ -50,7 +49,6 @@ }, { "name": "chitchat", - "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2081, @@ -68,7 +66,6 @@ ANNOTATORS_1 = [ { "name": "ner", - "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2083, @@ -85,7 +82,6 @@ ANNOTATORS_2 = [ { "name": "sentiment", - "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2084, @@ -104,7 +100,6 @@ SKILL_SELECTORS = [ { "name": "chitchat_odqa", - "batch_size": 1, "protocol": "http", "host": "127.0.0.1", "port": 2082, From c8e1d03ccfc3f4791e4ae272c00159ad0eea0e6b Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 7 Oct 2019 19:44:03 +0300 Subject: [PATCH 095/133] refactor: make list of hypotheses --- core/config_parser.py | 2 +- core/connectors.py | 12 +++--------- core/state_manager.py | 28 ++++++++++++++++------------ core/state_schema.py | 8 ++++---- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index dde2c09c..5592b5ac 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -88,7 +88,7 @@ def add_bot_to_name(name): if SKILLS: for s in SKILLS: - service, workers = make_service_from_config_rec(s, session, StateManager.add_selected_skill_dict, + service, workers = make_service_from_config_rec(s, session, StateManager.add_hypothesis_dict, ['SKILLS'], previous_services) services.append(service) worker_tasks.extend(workers) diff --git a/core/connectors.py b/core/connectors.py index 5eeb0b65..2eb4571c 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -65,18 +65,12 @@ def __init__(self, service_name: str): self.service_name = service_name async def send(self, payload: Dict, callback: Callable): - response = payload['utterances'][-1]['selected_skills'] - best_skill = sorted(response.items(), key=lambda x: x[1]['confidence'], reverse=True)[0] + response = payload['utterances'][-1]['hypotheses'] + best_skill = sorted(response, key=lambda x: x['confidence'], reverse=True)[0] response_time = time.time() await callback( dialog_id=payload['id'], service_name=self.service_name, - response={ - 'confidence_response_selector': { - 'skill_name': best_skill[0], - 'text': best_skill[1]['text'], - 'confidence': best_skill[1]['confidence'] - } - }, + response={'confidence_response_selector': best_skill}, response_time=response_time) diff --git a/core/state_manager.py b/core/state_manager.py index 7efc691f..3d71fdb6 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -42,12 +42,12 @@ def create_new_bot(persona: Optional[List[str]] = None): @staticmethod def create_new_human_utterance(text, user: Human, date_time, annotations=None, - selected_skills=None): + hypotheses=None): utt = HumanUtterance(text=text, user=user.to_dict(), date_time=date_time, annotations=annotations or HumanUtterance.annotations.default, - selected_skills=selected_skills or HumanUtterance.selected_skills.default) + hypotheses=hypotheses or HumanUtterance.hypotheses.default) utt.save() return utt @@ -93,9 +93,9 @@ def get_or_create_dialog(cls, user, location, channel_type, should_reset=False): @classmethod def add_human_utterance(cls, dialog: Dialog, user: Human, text: str, date_time: datetime, annotation: Optional[dict] = None, - selected_skill: Optional[dict] = None) -> None: + hypothesis: Optional[dict] = None) -> None: utterance = cls.create_new_human_utterance(text, user, date_time, annotation, - selected_skill) + hypothesis) dialog.utterances.append(utterance) dialog.save() @@ -122,10 +122,11 @@ def add_annotation(dialog: Dialog, payload: Dict): dialog.utterances[-1].save() @staticmethod - def add_selected_skill(dialog: Dialog, payload: Dict): - if not dialog.utterances[-1].selected_skills: - dialog.utterances[-1].selected_skills = {} - dialog.utterances[-1].selected_skills.update(payload) + def add_hypothesis(dialog: Dialog, payload: Dict): + # TODO remove the next 2 lines? + if not dialog.utterances[-1].hypotheses: + dialog.utterances[-1].hypotheses = [] + dialog.utterances[-1].hypotheses.append(payload) dialog.utterances[-1].save() @staticmethod @@ -137,7 +138,7 @@ def add_text(dialog: Dialog, payload: str): def add_bot_response(cls, dialog: Dialog, payload: Dict): active_skill_name = list(payload.values())[0] human_utterance = dialog.utterances[-1] - active_skill = human_utterance.selected_skills.get(active_skill_name, None) + active_skill = [h for h in human_utterance.hypotheses if h['skill_name'] == active_skill_name][0] # take first if not active_skill: raise ValueError(f'provided {payload} is not valid') @@ -216,7 +217,8 @@ def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payl active_skill_name = rselector_data['skill_name'] new_text = rselector_data['text'] new_confidence = rselector_data['confidence'] - active_skill = dialog['utterances'][-1]['selected_skills'].get(active_skill_name, None) + hypotheses = dialog['utterances'][-1]['hypotheses'] + active_skill = [h for h in hypotheses if h['skill_name'] == active_skill_name][0] # take first if not active_skill: raise ValueError(f'provided {payload} is not valid') cls.update_human_dict(dialog['human'], active_skill) @@ -236,8 +238,10 @@ def add_annotation_dict(dialog: Dict, dialog_object: Dialog, payload: Dict, **kw dialog['utterances'][-1]['annotations'].update(payload) @staticmethod - def add_selected_skill_dict(dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs): - dialog['utterances'][-1]['selected_skills'].update(payload) + def add_hypothesis_dict(dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs): + hypothesis = {'skill_name': list(payload.keys())[0]} + hypothesis = {**hypothesis, **list(payload.values())[0]} + dialog['utterances'][-1]['hypotheses'].append(hypothesis) @staticmethod def add_text_dict(dialog: Dict, payload: str): diff --git a/core/state_schema.py b/core/state_schema.py index 9c185d72..c9bc43fa 100644 --- a/core/state_schema.py +++ b/core/state_schema.py @@ -9,7 +9,7 @@ 'user': {}, 'annotations': {}, 'date_time': None, - 'selected_skills': {}, + 'hypotheses': [], } BOT_UTTERANCE_SCHEMA = { @@ -131,7 +131,7 @@ def make_from_dict(self, *args, **kwargs): class HumanUtterance(Utterance): - selected_skills = DynamicField(default=[]) + hypotheses = ListField(default=[]) def to_dict(self): return { @@ -140,7 +140,7 @@ def to_dict(self): 'user': self.user, 'annotations': self.annotations, 'date_time': str(self.date_time), - 'selected_skills': self.selected_skills + 'hypotheses': self.hypotheses } @classmethod @@ -150,7 +150,7 @@ def make_from_dict(cls, payload): utterance.text = payload['text'] utterance.annotations = payload['annotations'] utterance.date_time = payload['date_time'] - utterance.selected_skills = payload['selected_skills'] + utterance.hypotheses = payload['hypotheses'] utterance.user = payload['user'] utterance.save() return utterance From 9a5b90d2d90b519a335060ef139c379d78a9d57f Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 7 Oct 2019 20:32:31 +0300 Subject: [PATCH 096/133] refactor: skills return a list --- core/state_manager.py | 34 ++++++++++--------------------- state_formatters/dp_formatters.py | 11 +++++----- 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/core/state_manager.py b/core/state_manager.py index 3d71fdb6..9cfe4d8b 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -123,10 +123,9 @@ def add_annotation(dialog: Dialog, payload: Dict): @staticmethod def add_hypothesis(dialog: Dialog, payload: Dict): - # TODO remove the next 2 lines? - if not dialog.utterances[-1].hypotheses: - dialog.utterances[-1].hypotheses = [] - dialog.utterances[-1].hypotheses.append(payload) + hypothesis = {'skill_name': list(payload.keys())[0]} + for h in list(payload.values())[0]: + dialog.utterances[-1].hypotheses.append({**hypothesis, **h}) dialog.utterances[-1].save() @staticmethod @@ -135,16 +134,12 @@ def add_text(dialog: Dialog, payload: str): dialog.utterances[-1].save() @classmethod - def add_bot_response(cls, dialog: Dialog, payload: Dict): - active_skill_name = list(payload.values())[0] + def add_bot_response(cls, dialog: Dialog, payload: Dict, **kwargs): + active_skill = list(payload.values())[0] human_utterance = dialog.utterances[-1] - active_skill = [h for h in human_utterance.hypotheses if h['skill_name'] == active_skill_name][0] # take first - if not active_skill: - raise ValueError(f'provided {payload} is not valid') - + active_skill_name = active_skill['skill_name'] text = active_skill['text'] confidence = active_skill['confidence'] - cls.add_bot_utterance(dialog, text, datetime.now(), active_skill_name, confidence) cls.update_human(human_utterance.user, active_skill) cls.update_bot(dialog.utterances[-1].user, active_skill) @@ -214,21 +209,14 @@ def update_bot_dict(bot: Dict, active_skill: Dict): def add_bot_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs) -> None: rselector_data = list(payload.values())[0] - active_skill_name = rselector_data['skill_name'] new_text = rselector_data['text'] new_confidence = rselector_data['confidence'] - hypotheses = dialog['utterances'][-1]['hypotheses'] - active_skill = [h for h in hypotheses if h['skill_name'] == active_skill_name][0] # take first - if not active_skill: - raise ValueError(f'provided {payload} is not valid') - cls.update_human_dict(dialog['human'], active_skill) - cls.update_bot_dict(dialog['bot'], active_skill) - + cls.update_human_dict(dialog['human'], rselector_data) + cls.update_bot_dict(dialog['bot'], rselector_data) utterance = deepcopy(BOT_UTTERANCE_SCHEMA) utterance['text'] = new_text - utterance['orig_text'] = active_skill['text'] utterance['date_time'] = str(datetime.now()) - utterance['active_skill'] = active_skill_name + utterance['active_skill'] = rselector_data['skill_name'] utterance['confidence'] = new_confidence utterance['user'] = dialog['bot'] dialog['utterances'].append(utterance) @@ -240,8 +228,8 @@ def add_annotation_dict(dialog: Dict, dialog_object: Dialog, payload: Dict, **kw @staticmethod def add_hypothesis_dict(dialog: Dict, dialog_object: Dialog, payload: Dict, **kwargs): hypothesis = {'skill_name': list(payload.keys())[0]} - hypothesis = {**hypothesis, **list(payload.values())[0]} - dialog['utterances'][-1]['hypotheses'].append(hypothesis) + for h in list(payload.values())[0]: + dialog['utterances'][-1]['hypotheses'].append({**hypothesis, **h}) @staticmethod def add_text_dict(dialog: Dict, payload: str): diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 90e7ff53..e0cd0330 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -60,8 +60,7 @@ def base_skill_output_formatter(payload): Returns: a formatted batch instance """ - return {"text": payload[0], - "confidence": payload[1]} + return payload def base_annotator_formatter(payload: Any, model_args_names=('x',), mode='in'): @@ -102,16 +101,16 @@ def odqa_formatter(payload: Any, model_args_names=('question_raw',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': - return {"text": payload[0], - "confidence": 0.5} + return [{"text": payload[0], + "confidence": 0.5}] def chitchat_formatter(payload: Any, model_args_names=('q',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) elif mode == 'out': - return {"text": payload[0], - "confidence": 0.5} + return [{"text": payload[0], + "confidence": 0.5}] def chitchat_example_formatter(payload: Any, From 84f3d3e9884309b0ade62a79e7413034f7ebcd69 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 8 Oct 2019 13:39:06 +0300 Subject: [PATCH 097/133] docs: update skill api --- docs/source/api/services_http_api.rst | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/docs/source/api/services_http_api.rst b/docs/source/api/services_http_api.rst index 2d2c11e2..f191ecb5 100644 --- a/docs/source/api/services_http_api.rst +++ b/docs/source/api/services_http_api.rst @@ -60,25 +60,27 @@ For example: Skill ===== -Skill should return a dict with required ``text`` and ``confidence`` keys. If a skill wants to -update either **Human** or **Bot** profile, it should pack these attributes into ``human_attributes`` and -``bot_attributes`` keys. All attributes in ``human_attributes`` and ``bot_attributes`` will overwrite -current **Human** and **Bot** attribute values accordingly. And if there are no such attributes, they will be stored under -``attributes`` key inside **Human** or **Bot**. +Skill should return a **list of dicts** where each dict ia a single hypothesis. Each dict requires +``text`` and ``confidence`` keys. If a skill wants to update either **Human** or **Bot** profile, +it should pack these attributes into ``human_attributes`` and ``bot_attributes`` keys. + +All attributes in ``human_attributes`` and ``bot_attributes`` will overwrite current **Human** and **Bot** +attribute values accordingly. And if there are no such attributes, they will be stored under ``attributes`` +key inside **Human** or **Bot**. The minimum required response of a skill is a 2-key dictionary: .. code:: json - {"text": "hello", "confidence": 0.33} + [{"text": "hello", "confidence": 0.33}] But it's possible to extend it with ``human_attributes`` and ``bot_attributes`` keys: .. code:: json - {"text": "hello", "confidence": 0.33, "human_attributes": {"name": "Vasily"}, - "bot_attributes": {"persona": ["I like swimming.", "I have a nice swimming suit."]}} + [{"text": "hello", "confidence": 0.33, "human_attributes": {"name": "Vasily"}, + "bot_attributes": {"persona": ["I like swimming.", "I have a nice swimming suit."]}}] Everything sent to ``human_attributes`` and ``bot_attributes`` keys will update `user` field in the same utterance for the human and in the next utterance for the bot. Please refer to user_state_api_ to find more @@ -88,7 +90,7 @@ Also it's possible for a skill to send any additional key to the state: .. code:: json - {"text": "hello", "confidence": 0.33, "any_key": "any_value"} + [{"text": "hello", "confidence": 0.33, "any_key": "any_value"}] Response Selector @@ -102,6 +104,12 @@ overwritten from the original skill response) and confidence (also may be overwr {"skill_name": "chitchat", "text": "Hello, Joe!", "confidence": 0.3} +Also it's possible for a Response Selector to overwrite any ``human`` or ``bot`` attributes: + + .. code:: json + + {"skill_name": "chitchat", "text": "Hello, Joe!", "confidence": 0.3, "human_attributes": {"name": "Ivan"}} + Postprocessor ============= From d9a1dc9f90eecfc31fd54f59b4fba3a4d45aa8aa Mon Sep 17 00:00:00 2001 From: litinsky Date: Tue, 8 Oct 2019 17:09:33 +0300 Subject: [PATCH 098/133] moved TRANSPORT settings --- config.py | 21 --------------------- core/config_parser.py | 9 +++++---- core/transport/transport_settings.py | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+), 25 deletions(-) create mode 100644 core/transport/transport_settings.py diff --git a/config.py b/config.py index 94d34476..fd4e9770 100644 --- a/config.py +++ b/config.py @@ -7,27 +7,6 @@ DB_PORT = getenv('DB_PORT', 27017) DB_PATH = getenv('DB_PATH', '/data/db') -# TODO: move response timeout to transport settings -# TODO: change naming to transport settings -HIGHLOAD_SETTINGS = { - 'agent_namespace': 'deeppavlov_agent', - 'agent': { - 'name': 'dp_agent', - 'response_timeout_sec': 120 - }, - 'channels': {}, - 'transport': { - 'type': 'rabbitmq', - 'rabbitmq': { - 'host': '127.0.0.1', - 'port': 5672, - 'login': 'guest', - 'password': 'guest', - 'virtualhost': '/' - } - } -} - MAX_WORKERS = 4 AGENT_ENV_FILE = "agent.env" diff --git a/core/config_parser.py b/core/config_parser.py index 57cce68f..613ff2f0 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -5,7 +5,8 @@ import asyncio from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, \ - RESPONSE_SELECTORS, POSTPROCESSORS, HIGHLOAD_SETTINGS + RESPONSE_SELECTORS, POSTPROCESSORS +from core.transport.transport_settings import TRANSPORT_SETTINGS from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, \ QueueListenerBatchifyer, AgentGatewayToServiceConnector from core.pipeline import Service, simple_workflow_formatter @@ -14,9 +15,9 @@ def prepare_agent_gateway(on_channel_callback=None, on_service_callback=None): - transport_type = HIGHLOAD_SETTINGS['transport']['type'] + transport_type = TRANSPORT_SETTINGS['transport']['type'] gateway_cls = gateways_map[transport_type]['agent'] - return gateway_cls(config=HIGHLOAD_SETTINGS, + return gateway_cls(config=TRANSPORT_SETTINGS, on_service_callback=on_service_callback, on_channel_callback=on_channel_callback) @@ -198,7 +199,7 @@ def get_service_gateway_config(service_name): if not matching_config: raise ValueError(f'Config for service {service_name} was not found') - gateway_config = deepcopy(HIGHLOAD_SETTINGS) + gateway_config = deepcopy(TRANSPORT_SETTINGS) gateway_config['service'] = matching_config return gateway_config diff --git a/core/transport/transport_settings.py b/core/transport/transport_settings.py new file mode 100644 index 00000000..f35caed0 --- /dev/null +++ b/core/transport/transport_settings.py @@ -0,0 +1,19 @@ +# TODO: move response timeout to transport settings +TRANSPORT_SETTINGS = { + 'agent_namespace': 'deeppavlov_agent', + 'agent': { + 'name': 'dp_agent', + 'response_timeout_sec': 120 + }, + 'channels': {}, + 'transport': { + 'type': 'rabbitmq', + 'rabbitmq': { + 'host': '127.0.0.1', + 'port': 5672, + 'login': 'guest', + 'password': 'guest', + 'virtualhost': '/' + } + } +} \ No newline at end of file From 4c6c756a5324602f6f92c4b55424f2816ac7ca99 Mon Sep 17 00:00:00 2001 From: litinsky Date: Tue, 8 Oct 2019 17:26:17 +0300 Subject: [PATCH 099/133] Refactored loop break to for .. else --- core/config_parser.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index 474e8723..fe769919 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -187,8 +187,6 @@ def add_bot_to_name(name): def get_service_gateway_config(service_name): - matching_config = None - for config in chain(SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, RESPONSE_SELECTORS, POSTPROCESSORS): config_name = config['name'] @@ -196,8 +194,7 @@ def get_service_gateway_config(service_name): if config_name == service_name: matching_config = config break - - if not matching_config: + else: raise ValueError(f'Config for service {service_name} was not found') gateway_config = deepcopy(TRANSPORT_SETTINGS) From 1a35fb1546bc4a3b73094f4c0c28d3b7a30509c5 Mon Sep 17 00:00:00 2001 From: litinsky Date: Tue, 8 Oct 2019 17:31:31 +0300 Subject: [PATCH 100/133] Service url in ServiceGatewayHTTPConnector is taken from service config url param now --- core/connectors.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/connectors.py b/core/connectors.py index 08e5c3df..25ca1fd5 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -137,9 +137,7 @@ def __init__(self, service_config: dict, formatter: Callable) -> None: super(ServiceGatewayHTTPConnector, self).__init__(service_config, formatter) self._session = aiohttp.ClientSession() self._service_name = service_config['name'] - - default_infer_url = 'http://127.0.0.1:5000/model' - self._url = service_config.get('infer_url', default_infer_url) + self._url = service_config['url'] async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: async with await self._session.post(self._url, json=self._formatter(dialogs)) as resp: From 680cd27869f4cffed18774564dec6b2e97e12661 Mon Sep 17 00:00:00 2001 From: litinsky Date: Tue, 8 Oct 2019 17:33:44 +0300 Subject: [PATCH 101/133] Refactored super().__init__ in ServiceGatewayHTTPConnector --- core/connectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/connectors.py b/core/connectors.py index 25ca1fd5..711ddb10 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -134,7 +134,7 @@ class ServiceGatewayHTTPConnector(ServiceGatewayConnectorBase): _service_name: str def __init__(self, service_config: dict, formatter: Callable) -> None: - super(ServiceGatewayHTTPConnector, self).__init__(service_config, formatter) + super().__init__(service_config, formatter) self._session = aiohttp.ClientSession() self._service_name = service_config['name'] self._url = service_config['url'] From d6d64ec1ba35deb6b04be89227b2cd40eb1b7cbc Mon Sep 17 00:00:00 2001 From: litinsky Date: Tue, 8 Oct 2019 17:35:58 +0300 Subject: [PATCH 102/133] Moved all typevar bindings to the file bottom --- core/transport/base.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/core/transport/base.py b/core/transport/base.py index cfa2ddf9..444fc81a 100644 --- a/core/transport/base.py +++ b/core/transport/base.py @@ -35,9 +35,6 @@ async def send_to_channel(self, channel_id: str, user_id: str, response: str) -> raise NotImplementedError -TAgentGateway = TypeVar('TAgentGateway', bound=AgentGatewayBase) - - class ServiceGatewayConnectorBase: _service_config: dict _formatter: Callable @@ -50,9 +47,6 @@ async def send_to_service(self, dialogs: List[Dict]) -> List[Any]: raise NotImplementedError -TServiceGatewayConnectorBase = TypeVar('TServiceGatewayConnectorBase', bound=ServiceGatewayConnectorBase) - - class ServiceGatewayBase: _to_service_callback: Callable @@ -61,9 +55,6 @@ def __init__(self, to_service_callback: Callable, *args, **kwargs) -> None: self._to_service_callback = to_service_callback -TServiceGateway = TypeVar('TServiceGateway', bound=ServiceGatewayBase) - - class ChannelGatewayConnectorBase: _config: dict _channel_id: str @@ -78,9 +69,6 @@ async def send_to_channel(self, user_id: str, response: str) -> None: raise NotImplementedError -TChannelGatewayConnectorBase = TypeVar('TChannelGatewayConnectorBase', bound=ChannelGatewayConnectorBase) - - class ChannelGatewayBase: _to_channel_callback: Callable @@ -92,4 +80,8 @@ async def send_to_agent(self, utterance: str, channel_id: str, user_id: str, res raise NotImplementedError +TAgentGateway = TypeVar('TAgentGateway', bound=AgentGatewayBase) +TServiceGatewayConnectorBase = TypeVar('TServiceGatewayConnectorBase', bound=ServiceGatewayConnectorBase) +TServiceGateway = TypeVar('TServiceGateway', bound=ServiceGatewayBase) +TChannelGatewayConnectorBase = TypeVar('TChannelGatewayConnectorBase', bound=ChannelGatewayConnectorBase) TChannelGateway = TypeVar('TChannelGateway', bound=ChannelGatewayBase) From edb41a5ee83965f9b7034c3c68cd990fd4ac88bc Mon Sep 17 00:00:00 2001 From: litinsky Date: Tue, 8 Oct 2019 17:38:11 +0300 Subject: [PATCH 103/133] changed dict -> Dict --- core/transport/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/transport/base.py b/core/transport/base.py index 444fc81a..a098797f 100644 --- a/core/transport/base.py +++ b/core/transport/base.py @@ -39,7 +39,7 @@ class ServiceGatewayConnectorBase: _service_config: dict _formatter: Callable - def __init__(self, service_config: dict, formatter: Callable) -> None: + def __init__(self, service_config: Dict, formatter: Callable) -> None: self._service_config = service_config self._formatter = formatter From d7fe1557b0b8f90077accd6f894f2055d0578223 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 8 Oct 2019 18:01:35 +0300 Subject: [PATCH 104/133] hotfix: agent batch test --- utils/agent_batch_test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/utils/agent_batch_test.py b/utils/agent_batch_test.py index f11fb458..b8d0cc82 100644 --- a/utils/agent_batch_test.py +++ b/utils/agent_batch_test.py @@ -22,9 +22,12 @@ def init_agent(): services, workers, session = parse_old_config() + endpoint = Service('cmd_responder', EventSetOutputConnector('cmd_responder').send, + StateManager.save_dialog_dict, 1, ['responder']) + input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) pipeline = Pipeline(services) - endpoint = Service('http_responder', EventSetOutputConnector(), None, 1, ['responder']) pipeline.add_responder_service(endpoint) + pipeline.add_input_service(input_srv) agent = Agent(pipeline, StateManager()) return agent, session @@ -43,12 +46,13 @@ async def main(): tasks = [] for u, u_d_type, dt, loc, ch_t in zip(phrases, u_d_types, date_times, locations, ch_types): u_tg_id = uuid.uuid4().hex - tasks.append(agent.register_msg(u, u_tg_id, u_d_type, dt, loc, ch_t, None, True)) + tasks.append(agent.register_msg(utterance=u, user_telegram_id=u_tg_id, user_device_type=u_d_type, + location=loc, channel_type=ch_t, require_response=True)) res = await asyncio.gather(*tasks, return_exceptions=False) await session.close() - return [i['dialog'].utterances[-1].text for i in res] + return [i['dialog']['utterances'][-1]['text'] for i in res] if __name__ == "__main__": From bc662b1963dd1ead9998e0e87f2d04eb3c852bc4 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 13:41:50 +0300 Subject: [PATCH 105/133] Updated flake8 config --- .flake8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.flake8 b/.flake8 index 43ca0dd7..caf82a27 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] max-line-length=120 ignore=D100,D101,D102,D103,D107,F403,F405 -exclude=.git,__pycache__,build,dist +exclude=.git,__pycache__,build,dist,env From e27300b89c5d8f0510b54a8514102c0080783a80 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 14:02:47 +0300 Subject: [PATCH 106/133] Added graceful RabbitMQ disconnect --- core/run.py | 18 +++++++++++++++--- core/transport/gateways/rabbitmq.py | 3 +++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/core/run.py b/core/run.py index 536b0d9d..caad22ab 100644 --- a/core/run.py +++ b/core/run.py @@ -2,7 +2,6 @@ import argparse import uuid import logging -from typing import Any, Hashable from aiohttp import web, ClientSession from datetime import datetime @@ -194,6 +193,8 @@ def run_default(): future.cancel() if session: loop.run_until_complete(session.close()) + if gateway: + gateway.disconnect() loop.stop() loop.close() logging.shutdown() @@ -254,10 +255,21 @@ def run_service(): transport_type = gateway_config['transport']['type'] gateway_cls = gateways_map[transport_type]['service'] - _gateway = gateway_cls(config=gateway_config, to_service_callback=connector.send_to_service) + gateway = gateway_cls(config=gateway_config, to_service_callback=connector.send_to_service) loop = asyncio.get_event_loop() - loop.run_forever() + + try: + loop.run_forever() + except KeyboardInterrupt: + pass + except Exception as e: + raise e + finally: + gateway.disconnect() + loop.stop() + loop.close() + logging.shutdown() def run_channel(): diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index edd670c9..7f2a9c56 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -81,6 +81,9 @@ async def _connect(self) -> None: type=aio_pika.ExchangeType.TOPIC) logger.info(f'Declared agent out exchange: {agent_out_exchange_name}') + def disconnect(self): + self._connection.close() + async def _setup_queues(self) -> None: raise NotImplementedError From 696ce25fb2bb9c7ca81081b6e3cbdfdeb9e0a8a2 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 14:04:17 +0300 Subject: [PATCH 107/133] Small style fixes --- core/config_parser.py | 9 +++++---- core/log.py | 2 +- core/pipeline.py | 2 +- core/transport/transport_settings.py | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index fe769919..838ab4dd 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -134,10 +134,11 @@ def add_bot_to_name(name): ) else: for r in RESPONSE_SELECTORS: - service, workers, session, gateway = make_service_from_config_rec(r, session, - StateManager.add_bot_utterance_simple_dict, - ['RESPONSE_SELECTORS'], previous_services, - gateway) + service, workers, session, gateway = \ + make_service_from_config_rec(r, session, + StateManager.add_bot_utterance_simple_dict, + ['RESPONSE_SELECTORS'], previous_services, + gateway) services.append(service) worker_tasks.extend(workers) diff --git a/core/log.py b/core/log.py index 575ef238..f7f0db6a 100644 --- a/core/log.py +++ b/core/log.py @@ -32,4 +32,4 @@ def init_logger(): handler['filename'] = str(logfile_path) - logging.config.dictConfig(log_config) \ No newline at end of file + logging.config.dictConfig(log_config) diff --git a/core/pipeline.py b/core/pipeline.py index 44015850..b9e050c9 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -61,7 +61,7 @@ def process_service_names(self): self.services[name_prev_service].next_services.add(service) return wrong_names # wrong names means that some service_names, used in previous services don't exist - def get_next_services(self, done: set = None, waiting: set =None): + def get_next_services(self, done: set = None, waiting: set = None): if done is None: done = set() if waiting is None: diff --git a/core/transport/transport_settings.py b/core/transport/transport_settings.py index f35caed0..e4bc4fe7 100644 --- a/core/transport/transport_settings.py +++ b/core/transport/transport_settings.py @@ -16,4 +16,4 @@ 'virtualhost': '/' } } -} \ No newline at end of file +} From 68ccfcc1a8bc7ad43cb8642dfa9dccd2e19f20a2 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 14:27:22 +0300 Subject: [PATCH 108/133] Removed some comments from config.py --- config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/config.py b/config.py index f7528019..3f29d390 100644 --- a/config.py +++ b/config.py @@ -11,11 +11,6 @@ AGENT_ENV_FILE = "agent.env" -# TODO: may be we should move default values setting for service config params from code to special config section -# Implicit service default params: -# batch_size = 1 -# infer_url = http://127.0.0.1:5000/model - SKILLS = [ { "name": "odqa", From 826e9334cdc9b94fd9902d0f7ecb095eebaa53b0 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 14:35:03 +0300 Subject: [PATCH 109/133] Changed transport_settings structure --- core/transport/transport_settings.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/core/transport/transport_settings.py b/core/transport/transport_settings.py index e4bc4fe7..a49da508 100644 --- a/core/transport/transport_settings.py +++ b/core/transport/transport_settings.py @@ -1,10 +1,7 @@ -# TODO: move response timeout to transport settings TRANSPORT_SETTINGS = { 'agent_namespace': 'deeppavlov_agent', - 'agent': { - 'name': 'dp_agent', - 'response_timeout_sec': 120 - }, + 'agent_name': 'dp_agent', + 'response_timeout_sec': 120, 'channels': {}, 'transport': { 'type': 'rabbitmq', From 594dc1dd48259d43d2a1693c5f618997b292282c Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 14:41:19 +0300 Subject: [PATCH 110/133] Fixed transport type in TRANSPORT_SETTINGS --- core/transport/transport_settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/transport/transport_settings.py b/core/transport/transport_settings.py index a49da508..318706a5 100644 --- a/core/transport/transport_settings.py +++ b/core/transport/transport_settings.py @@ -4,8 +4,8 @@ 'response_timeout_sec': 120, 'channels': {}, 'transport': { - 'type': 'rabbitmq', - 'rabbitmq': { + 'type': 'AMQP', + 'AMQP': { 'host': '127.0.0.1', 'port': 5672, 'login': 'guest', From eecf5508b9eb31d882c1b3f9c724686d8fbe9271 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 14:44:01 +0300 Subject: [PATCH 111/133] Fixed proper utterance_lifetime_sec param name --- core/transport/transport_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/transport/transport_settings.py b/core/transport/transport_settings.py index 318706a5..276ec75c 100644 --- a/core/transport/transport_settings.py +++ b/core/transport/transport_settings.py @@ -1,7 +1,7 @@ TRANSPORT_SETTINGS = { 'agent_namespace': 'deeppavlov_agent', 'agent_name': 'dp_agent', - 'response_timeout_sec': 120, + 'utterance_lifetime_sec': 120, 'channels': {}, 'transport': { 'type': 'AMQP', From 1f031f37b46146716e77a689fb6700099cb9d29f Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 14:55:56 +0300 Subject: [PATCH 112/133] Fixed url generation --- core/transform_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/transform_config.py b/core/transform_config.py index a84430cc..8f171425 100644 --- a/core/transform_config.py +++ b/core/transform_config.py @@ -13,8 +13,9 @@ # generate component url for service in chain(*ANNOTATORS, SKILL_SELECTORS, SKILLS, RESPONSE_SELECTORS, POSTPROCESSORS): - host = service['name'] if getenv('DPA_LAUNCHING_ENV') == 'docker' else service['host'] - service['url'] = f"{service['protocol']}://{host}:{service['port']}/{service['endpoint']}" + if 'url' not in service: + host = service['name'] if getenv('DPA_LAUNCHING_ENV') == 'docker' else service['host'] + service['url'] = f"{service['protocol']}://{host}:{service['port']}/{service['endpoint']}" DB_HOST = 'mongo' if getenv('DPA_LAUNCHING_ENV') == 'docker' else DB_HOST From 6a638e5d4eabc599be1a27ff51c58551ed7efc0d Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Wed, 9 Oct 2019 17:11:24 +0300 Subject: [PATCH 113/133] fix test setup --- tests/dummy_connectors_test_setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/dummy_connectors_test_setup.py b/tests/dummy_connectors_test_setup.py index 53516e4f..ac16b2c7 100644 --- a/tests/dummy_connectors_test_setup.py +++ b/tests/dummy_connectors_test_setup.py @@ -7,7 +7,7 @@ from datetime import datetime from random import choice -from core.pipeline import Service +from core.service import Service from core.connectors import HttpOutputConnector from core.config_parser import parse_old_config from core.state_manager import StateManager @@ -30,7 +30,7 @@ async def send(self, payload, callback): await callback( dialog_id=payload['id'], service_name=self.service_name, - response={self.service_name: {"text": choice(self.returns), "confidence": 0.5}}, + response={self.service_name: [{"text": choice(self.returns), "confidence": 0.5}]}, response_time=time.time()) From 11663ae5fcc348d9dcb70aa1428dbd7e1f4ac28e Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Wed, 9 Oct 2019 17:11:50 +0300 Subject: [PATCH 114/133] split service and pipeline to different modules --- core/config_parser.py | 3 ++- core/pipeline.py | 30 +----------------------------- core/run.py | 3 ++- core/service.py | 27 +++++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 31 deletions(-) create mode 100644 core/service.py diff --git a/core/config_parser.py b/core/config_parser.py index 5592b5ac..0cefb70f 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -5,7 +5,8 @@ RESPONSE_SELECTORS, POSTPROCESSORS from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, \ QueueListenerBatchifyer -from core.pipeline import Service, simple_workflow_formatter +from core.pipeline import simple_workflow_formatter +from core.service import Service from core.state_manager import StateManager diff --git a/core/pipeline.py b/core/pipeline.py index 44015850..87cc3442 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -1,33 +1,5 @@ from collections import defaultdict, Counter - - -class Service: - def __init__(self, name, connector_func, state_processor_method=None, - batch_size=1, tags=None, names_previous_services=None, - workflow_formatter=None): - self.name = name - self.batch_size = batch_size - self.state_processor_method = state_processor_method - self.names_previous_services = names_previous_services or set() - self.tags = tags or [] - self.workflow_formatter = workflow_formatter - self.connector_func = connector_func - self.previous_services = set() - self.next_services = set() - - def is_sselector(self): - return 'selector' in self.tags - - def is_responder(self): - return 'responder' in self.tags - - def is_input(self): - return 'input' in self.tags - - def apply_workflow_formatter(self, workflow_record): - if not self.workflow_formatter: - return workflow_record - return self.workflow_formatter(workflow_record) +from core.service import Service class Pipeline: diff --git a/core/run.py b/core/run.py index 49ccded8..c9966037 100644 --- a/core/run.py +++ b/core/run.py @@ -9,7 +9,8 @@ from os import getenv from core.agent import Agent -from core.pipeline import Pipeline, Service +from core.pipeline import Pipeline +from core.service import Service from core.connectors import EventSetOutputConnector, HttpOutputConnector from core.config_parser import parse_old_config from core.state_manager import StateManager diff --git a/core/service.py b/core/service.py new file mode 100644 index 00000000..1d34d5b0 --- /dev/null +++ b/core/service.py @@ -0,0 +1,27 @@ +class Service: + def __init__(self, name, connector_func, state_processor_method=None, + batch_size=1, tags=None, names_previous_services=None, + workflow_formatter=None): + self.name = name + self.batch_size = batch_size + self.state_processor_method = state_processor_method + self.names_previous_services = names_previous_services or set() + self.tags = tags or [] + self.workflow_formatter = workflow_formatter + self.connector_func = connector_func + self.previous_services = set() + self.next_services = set() + + def is_sselector(self): + return 'selector' in self.tags + + def is_responder(self): + return 'responder' in self.tags + + def is_input(self): + return 'input' in self.tags + + def apply_workflow_formatter(self, workflow_record): + if not self.workflow_formatter: + return workflow_record + return self.workflow_formatter(workflow_record) From 28878b1930dd1cee2090a478f01f3b5c55f3fa11 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 17:30:38 +0300 Subject: [PATCH 115/133] Added workaround to transport for bot annotator services --- core/config_parser.py | 11 ++++++-- core/transport/gateways/rabbitmq.py | 43 +++++++++++++++++------------ core/transport/messages.py | 4 ++- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index 838ab4dd..ccab1841 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -22,6 +22,10 @@ def prepare_agent_gateway(on_channel_callback=None, on_service_callback=None): on_channel_callback=on_channel_callback) +def add_bot_to_name(name): + return f'bot_{name}' + + def parse_old_config(): services = [] worker_tasks = [] @@ -69,9 +73,6 @@ def make_service_from_config_rec(conf_record, sess, state_processor_method, tags return _service, _worker_tasks, sess, gate - def add_bot_to_name(name): - return f'bot_{name}' - for anno in ANNOTATORS_1: service, workers, session, gateway = make_service_from_config_rec(anno, session, StateManager.add_annotation_dict, @@ -201,4 +202,8 @@ def get_service_gateway_config(service_name): gateway_config = deepcopy(TRANSPORT_SETTINGS) gateway_config['service'] = matching_config + # TODO think if we can remove this workaround for bot annotators + if service_name in [service['name'] for service in chain(ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3)]: + gateway_config['service']['names'] = [service_name, add_bot_to_name(service_name)] + return gateway_config diff --git a/core/transport/gateways/rabbitmq.py b/core/transport/gateways/rabbitmq.py index 7f2a9c56..60581c4c 100644 --- a/core/transport/gateways/rabbitmq.py +++ b/core/transport/gateways/rabbitmq.py @@ -143,7 +143,11 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: async def send_to_service(self, service_name: str, dialog: dict) -> None: task_uuid = str(uuid4()) - task = ServiceTaskMessage(agent_name=self._agent_name, task_uuid=task_uuid, dialog=dialog) + task = ServiceTaskMessage(agent_name=self._agent_name, + service_name=service_name, + task_uuid=task_uuid, + dialog=dialog) + logger.debug(f'Created task {task_uuid} to service {service_name} with dialog state: {str(dialog)}') message = Message(body=json.dumps(task.to_json()).encode('utf-8'), @@ -204,15 +208,18 @@ async def _setup_queues(self) -> None: self._in_queue = await self._agent_out_channel.declare_queue(name=in_queue_name, durable=True) logger.info(f'Declared service in queue: {in_queue_name}') - any_instance_routing_key = SERVICE_ROUTING_KEY_TEMPLATE.format(service_name=self._service_name) - await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=any_instance_routing_key) - logger.info(f'Queue: {in_queue_name} bound to routing key: {any_instance_routing_key}') + # TODO think if we can remove this workaround for bot annotators + service_names = self._config['service'].get('names', []) or [self._service_name] + for service_name in service_names: + any_instance_routing_key = SERVICE_ROUTING_KEY_TEMPLATE.format(service_name=service_name) + await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=any_instance_routing_key) + logger.info(f'Queue: {in_queue_name} bound to routing key: {any_instance_routing_key}') - this_instance_routing_key = SERVICE_INSTANCE_ROUTING_KEY_TEMPLATE.format(service_name=self._service_name, - instance_id=self._instance_id) + this_instance_routing_key = SERVICE_INSTANCE_ROUTING_KEY_TEMPLATE.format(service_name=service_name, + instance_id=self._instance_id) - await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=this_instance_routing_key) - logger.info(f'Queue: {in_queue_name} bound to routing key: {this_instance_routing_key}') + await self._in_queue.bind(exchange=self._agent_out_exchange, routing_key=this_instance_routing_key) + logger.info(f'Queue: {in_queue_name} bound to routing key: {this_instance_routing_key}') await self._agent_out_channel.set_qos(prefetch_count=self._batch_size * 2) @@ -253,8 +260,8 @@ async def _on_message_callback(self, message: IncomingMessage) -> None: self._infer_lock.release() async def _process_tasks(self, tasks_batch: List[ServiceTaskMessage]) -> bool: - task_agent_names_batch, task_uuids_batch, dialogs_batch = \ - zip(*[(task.agent_name, task.task_uuid, task.dialog) for task in tasks_batch]) + task_uuids_batch, dialogs_batch = \ + zip(*[(task.task_uuid, task.dialog) for task in tasks_batch]) logger.debug(f'Prepared for infering tasks {str(task_uuids_batch)}') @@ -266,7 +273,7 @@ async def _process_tasks(self, tasks_batch: List[ServiceTaskMessage]) -> bool: for i, response in enumerate(responses_batch): results_replies.append( - self._send_results(task_agent_names_batch[i], task_uuids_batch[i], dialogs_batch[i]['id'], response) + self._send_results(tasks_batch[i], response) ) await asyncio.gather(*results_replies) @@ -275,21 +282,21 @@ async def _process_tasks(self, tasks_batch: List[ServiceTaskMessage]) -> bool: except asyncio.TimeoutError: return False - async def _send_results(self, agent_name: str, task_uuid: str, dialog_id: str, response: dict) -> None: - result = ServiceResponseMessage(agent_name=agent_name, - task_uuid=task_uuid, - service_name=self._service_name, + async def _send_results(self, task: ServiceTaskMessage, response: Dict) -> None: + result = ServiceResponseMessage(agent_name=task.agent_name, + task_uuid=task.task_uuid, + service_name=task.service_name, service_instance_id=self._instance_id, - dialog_id=dialog_id, + dialog_id=task.dialog['id'], response=response) message = Message(body=json.dumps(result.to_json()).encode('utf-8'), delivery_mode=aio_pika.DeliveryMode.PERSISTENT, expiration=self._utterance_lifetime_sec) - routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=agent_name) + routing_key = AGENT_ROUTING_KEY_TEMPLATE.format(agent_name=task.agent_name) await self._agent_in_exchange.publish(message=message, routing_key=routing_key) - logger.debug(f'Sent response for task {str(task_uuid)} with routing key {routing_key}') + logger.debug(f'Sent response for task {str(task.task_uuid)} with routing key {routing_key}') class RabbitMQChannelGateway(RabbitMQTransportBase, ChannelGatewayBase): diff --git a/core/transport/messages.py b/core/transport/messages.py index 45ea9b50..8c7fc266 100644 --- a/core/transport/messages.py +++ b/core/transport/messages.py @@ -17,12 +17,14 @@ def to_json(self) -> dict: class ServiceTaskMessage(MessageBase): msg_type = 'service_task' agent_name: str + service_name: str task_uuid: str dialog: dict - def __init__(self, agent_name: str, task_uuid: str, dialog: dict) -> None: + def __init__(self, agent_name: str, service_name: str, task_uuid: str, dialog: dict) -> None: self.msg_type = self.__class__.msg_type self.agent_name = agent_name + self.service_name = service_name self.task_uuid = task_uuid self.dialog = dialog From d355d81f52588ea1943c3f009002efa6c6087c72 Mon Sep 17 00:00:00 2001 From: litinsky Date: Wed, 9 Oct 2019 18:11:37 +0300 Subject: [PATCH 116/133] Set default logging level to info --- log_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/log_config.yml b/log_config.yml index bdb1a11d..4e9f6ed4 100644 --- a/log_config.yml +++ b/log_config.yml @@ -2,7 +2,7 @@ version: 1 disable_existing_loggers: false loggers: core: - level: 'DEBUG' + level: 'INFO' propagate: true handlers: ['stderr'] service_logger: From dcdc09cdf3f3862f9cabc3083789b88184540c5a Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Fri, 11 Oct 2019 11:20:00 +0300 Subject: [PATCH 117/133] formatters for http output --- core/connectors.py | 2 +- core/run.py | 19 ++++++++++++++----- state_formatters/output_formatters.py | 17 +++++++++++++++++ 3 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 state_formatters/output_formatters.py diff --git a/core/connectors.py b/core/connectors.py index 2eb4571c..e154dce1 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -82,7 +82,7 @@ def __init__(self, intermediate_storage: Dict, service_name: str): async def send(self, payload: Dict, callback: Callable): message_uuid = payload['message_uuid'] event = payload['event'] - response_text = payload['dialog']['utterances'][-1]['text'] + response_text = payload self.intermediate_storage[message_uuid] = response_text event.set() response_time = time.time() diff --git a/core/run.py b/core/run.py index 49ccded8..4c3fa217 100644 --- a/core/run.py +++ b/core/run.py @@ -14,6 +14,8 @@ from core.config_parser import parse_old_config from core.state_manager import StateManager +from state_formatters.output_formatters import http_api_output_formatter, http_debug_output_formatter + from aiogram import Bot from aiogram.utils import executor from aiogram.dispatcher import Dispatcher @@ -87,9 +89,12 @@ async def on_shutdown(app): await app['client_session'].close() -async def init_app(register_msg, intermediate_storage, on_startup, on_shutdown_func=on_shutdown): +async def init_app(register_msg, intermediate_storage, + on_startup, on_shutdown_func=on_shutdown, + debug=False): app = web.Application(debug=True) - handle_func = await api_message_processor(register_msg, intermediate_storage) + handle_func = await api_message_processor( + register_msg, intermediate_storage, debug) app.router.add_post('/', handle_func) app.router.add_get('/dialogs', users_dialogs) app.router.add_get('/dialogs/{dialog_id}', dialog) @@ -110,7 +115,7 @@ async def startup_background_tasks(app): return startup_background_tasks -async def api_message_processor(register_msg, intermediate_storage): +async def api_message_processor(register_msg, intermediate_storage, debug=False): async def api_handle(request): user_id = None bot_response = None @@ -134,8 +139,12 @@ async def api_handle(request): if bot_response is None: raise RuntimeError('Got None instead of a bot response.') + if debug: + response = http_debug_output_formatter(bot_response) + else: + response = http_api_output_formatter(bot_response) - return web.json_response({'user_id': user_id, 'response': bot_response}) + return web.json_response(response) return api_handle @@ -198,7 +207,7 @@ def main(): input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) register_msg, process_callable = prepare_agent(services, endpoint, input_srv, args.response_logger) app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), - on_shutdown) + on_shutdown, args.debug) web.run_app(app, port=args.port) diff --git a/state_formatters/output_formatters.py b/state_formatters/output_formatters.py new file mode 100644 index 00000000..9069efb2 --- /dev/null +++ b/state_formatters/output_formatters.py @@ -0,0 +1,17 @@ +from typing import Dict + + +def http_api_output_formatter(payload: Dict): + return { + 'user_id': payload['dialog']['human']['user_telegram_id'], + 'response': payload['dialog']['utterances'][-1]['text'], + } + + +def http_debug_output_formatter(payload: Dict): + return { + 'user_id': payload['dialog']['human']['user_telegram_id'], + 'response': payload['dialog']['utterances'][-1]['text'], + 'active_skill': payload['dialog']['utterances'][-1]['active_skill'], + 'debug_output': payload['dialog']['utterances'][-2]['hypotheses'] + } From 775d94f4ab433e5db3b873941ac94b1a3795b5c8 Mon Sep 17 00:00:00 2001 From: Pavel Pugin Date: Fri, 11 Oct 2019 12:33:26 +0300 Subject: [PATCH 118/133] fix comments --- core/agent.py | 8 ++++---- core/connectors.py | 6 ++++-- core/run.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/core/agent.py b/core/agent.py index 108cf15e..9b2deb96 100644 --- a/core/agent.py +++ b/core/agent.py @@ -47,8 +47,8 @@ def flush_record(self, dialog_id: str): def register_service_request(self, dialog_id: str, service_name): if dialog_id not in self.workflow.keys(): raise ValueError(f'dialog with id {dialog_id} is not exist in workflow') - self.workflow[dialog_id]['services'][service_name] = {'send': True, 'done': False, 'send_time': time(), - 'done_time': None} + self.workflow[dialog_id]['services'][service_name] = {'send': True, 'done': False, 'agent_send_time': time(), + 'agent_done_time': None} def get_services_status(self, dialog_id: str): if dialog_id not in self.workflow.keys(): @@ -71,7 +71,7 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res if service: service_data = workflow_record['services'][service_name] service_data['done'] = True - service_data['done_time'] = time() + service_data['agent_done_time'] = time() if response and service.state_processor_method: service.state_processor_method(dialog=workflow_record['dialog'], dialog_object=workflow_record['dialog_object'], @@ -98,7 +98,7 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res for service in next_services: if service.name not in selected_services: self.workflow[dialog_id]['services'][service.name] = {'done': True, 'send': False, - 'send_time': None, 'done_time': None} + 'agent_send_time': None, 'agent_done_time': None} else: result.append(service) next_services = result diff --git a/core/connectors.py b/core/connectors.py index 5189f24b..95491798 100644 --- a/core/connectors.py +++ b/core/connectors.py @@ -101,11 +101,12 @@ async def send(self, payload: Dict, callback: Callable): service_send_time = time.time() self.intermediate_storage[message_uuid] = response_text event.set() + service_response_time = time.time() await callback(dialog_id=payload['dialog']['id'], service_name=self.service_name, response=response_text, service_send_time=service_send_time, - service_response_time=time.time()) + service_response_time=service_response_time) class EventSetOutputConnector: @@ -118,8 +119,9 @@ async def send(self, payload: Dict, callback: Callable): if not event or not isinstance(event, asyncio.Event): raise ValueError("'event' key is not presented in payload") event.set() + service_response_time = time.time() await callback(dialog_id=payload['dialog']['id'], service_name=self.service_name, response=" ", service_send_time=service_send_time, - service_response_time=time.time()) + service_response_time=service_response_time) diff --git a/core/run.py b/core/run.py index 27e5124c..ebb0c7ad 100644 --- a/core/run.py +++ b/core/run.py @@ -32,8 +32,8 @@ def response_logger(workflow_record): for service_name, service_data in workflow_record['services'].items(): - done = service_data['done_time'] - send = service_data['send_time'] + done = service_data['agent_done_time'] + send = service_data['agent_send_time'] if not send or not done: continue logger.info(f'{service_name}\t{round(done - send, 5)}\tseconds') From 13ae8d0e95c122d669a47d0dfbf5cfe79e144c5a Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 11 Oct 2019 17:57:40 +0300 Subject: [PATCH 119/133] docs: state API version 0.12.0 --- core/__init__.py | 2 +- core/state_schema.py | 3 +- docs/source/_static/api.html | 2 +- docs/source/_static/apispec/agent_v0.12.0.yml | 409 ++++++++++++++++++ 4 files changed, 413 insertions(+), 3 deletions(-) create mode 100644 docs/source/_static/apispec/agent_v0.12.0.yml diff --git a/core/__init__.py b/core/__init__.py index 89e51f14..c8c8e77e 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1 +1 @@ -STATE_API_VERSION = "0.12" +STATE_API_VERSION = "0.12.0" diff --git a/core/state_schema.py b/core/state_schema.py index c9bc43fa..71ed7ad7 100644 --- a/core/state_schema.py +++ b/core/state_schema.py @@ -53,7 +53,8 @@ 'utterances': [], 'channel_type': None, 'human': None, - 'bot': None + 'bot': None, + 'version': STATE_API_VERSION } diff --git a/docs/source/_static/api.html b/docs/source/_static/api.html index 298f64c7..427c9941 100644 --- a/docs/source/_static/api.html +++ b/docs/source/_static/api.html @@ -13,7 +13,7 @@ - + \ No newline at end of file diff --git a/docs/source/_static/apispec/agent_v0.12.0.yml b/docs/source/_static/apispec/agent_v0.12.0.yml new file mode 100644 index 00000000..e37f3e6f --- /dev/null +++ b/docs/source/_static/apispec/agent_v0.12.0.yml @@ -0,0 +1,409 @@ +openapi: 3.0.1 +info: + title: DeepPavlov Agent Services REST API + version: 0.12.0 + description: >- + Agents built with DeepPavlov Agent communicate with their Services via HTTP, so + endpoints should be specified. +servers: + - url: 'http://localhost:{port}/' + description: Local development server + variables: + port: + default: '4242' +paths: + /: + get: + summary: Root path + responses: + '200': + description: Go to /apidocs/ to see graphical web UI for this API. + '/api/v0/{skill_endpoint}/': + post: + parameters: + - name: skill_endpoint + in: path + required: true + schema: + enum: + - model + summary: Generic skill endpoint + description: >- + An agent built with DeepPavlov Agent sends requests to the services endpoints in + order to retrieve the answers. + requestBody: + description: Description of the request to be executed + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RequestBodySchema' + examples: + general: + $ref: '#/components/examples/GenericRequestBody' + responses: + '200': + description: Request finished succesfully. + content: + application/json: + schema: + $ref: "#/components/schemas/ODQAResponse200Schema" + examples: + odqa: + $ref: "#/components/examples/ODQAResponse" + '404': + description: This skill doesn't exsits. +components: + schemas: + RequestBodySchema: + type: object + properties: + id: + description: REQUIRED. A unique id of the dialog. + type: string + location: + description: 'REQUIRED. A free-formatted location where the dialog is happening.' + type: string + utterances: + description: >- + REQUIRED. A list of all utterances of the dialog. The last utterance always belongs + to a human user. + type: array + items: + oneOf: + - $ref: '#/components/schemas/HumanUtterance' + - $ref: '#/components/schemas/BotUtterance' + human: + $ref: '#/components/schemas/Human' + bot: + $ref: '#/components/schemas/Bot' + channel_type: + description: >- + REQUIRED. A channel where the communication is happening. For example, "telegram", + "facebook", "http". + type: string + Human: + description: 'REQUIRED. A human user in the dialog.' + type: object + properties: + id: + description: REQUIRED. A unique is of the human user. + type: string + user_telegram_id: + description: REQUIRED. A unique Telegram id of the human user. + type: string + user_type: + description: REQUIRED. A user type. Here it is always “human”. + type: string + device_type: + description: >- + REQUIRED. A name of the device which is used by the user. For example, it can be "iphone" or "android". + type: string + persona: + description: REQUIRED. A persona of the human user. It is stored as an array of sentences characterizing the human user. By default this is an empty array. + type: array + items: + type: string + profile: + $ref: '#/components/schemas/Profile' + attributes: + description: Generic key-value attributes. + type: object + items: + type: object + Bot: + description: >- + REQUIRED. A bot user of the dialog. A bot is an agent with a particular skill set. + type: object + properties: + id: + description: REQUIRED. A unique is of the bot user. + type: string + user_type: + description: REQUIRED. A user type. Here it is always “human”. + type: string + persona: + description: REQUIRED. A persona of the bot user. It is stored as an array of sentences characterizing the human user. By default this is an empty array. + type: array + items: + type: string + attributes: + description: Generic key-value attributes. + type: object + items: + type: object + Profile: + description: REQUIRED. A personal information about the human user. + type: object + properties: + gender: + description: REQUIRED. A gender of the human user. + type: string + birthdate: + description: REQUIRED. Birthdate + type: string + format: date + name: + description: REQUIRED. A name of the human user. + type: string + location: + description: REQUIRED. A location of the human user. + type: object + home_coordinates: + description: REQUIRED. Home coordinates of the human user. + type: object + work_coordinates: + description: REQUIRED. Workplace coordinates of the human user. + type: object + occupation: + description: REQUIRED. A profession of the human user. + type: string + income_per_year: + description: REQUIRED. An income of the human user. + type: number + HumanUtterance: + description: RESUIRED. An utterance of the human user. + type: object + properties: + id: + type: string + description: REQUIRED. A unique id of the human utterance. + text: + type: string + description: >- + REQUIRED. Text of the human utterance. If this is the very first utterance of the dialog, + it has the "/start" value. + user: + $ref: '#/components/schemas/Human' + annotations: + $ref: '#/components/schemas/Annotations' + date_time: + type: string + format: datetime + description: REQUIRED. A time of the utterance receiving by the agent server. + hypotheses: + type: array + items: + type: object + description: >- + Response candidates to this particular Utterance, generated by Skills. + BotUtterance: + description: RESUIRED. An utterance of the bot user. + type: object + properties: + id: + type: string + description: REQUIRED. A unique id of the bot utterance. + text: + type: string + description: >- + REQUIRED. Text of the bot utterance. + orig_text: + type: string + description: >- + An original reponse given by the skill which can be transformed later by ResponseSelector + or Postprocessor. If it was transformed, the transformed response goes to the "text" field + and the original response is stored to the "orig_text" field. The field has value None by default. + user: + $ref: '#/components/schemas/Bot' + annotations: + $ref: '#/components/schemas/Annotations' + date_time: + type: string + format: datetime + description: REQUIRED. A time of the utterance receiving by the agent server. + confidence: + type: number + description: Skill confidence in its response. + active_skill: + type: string + description: >- + A name of the skill which was responsible for the final bot response generation. + Annotations: + description: >- + REQUIRED. The utterances annotations, or tags. The default values of the field is an empty array: []. If the dialog starts with "/start" utterance, this utterance is not being annotated. + type: object + ODQAResponse200Unit: + description: >- + A list of skill responses. Each response here is a hypothetical response to the same human utterance. So s skill should generate a number of possible reponses for each incoming human utterance. + type: array + items: + type: object + properties: + text: + description: A text reponse of the skill. + type: string + confidence: + description: >- + Skill confidence in its reponse. + type: number + ODQAResponse200Schema: + description: >- + A batch of lists or skill responses. A skill should provide a list of hypothetical answers for each incoming human utterance. + properties: + responses: + type: array + items: + $ref: '#/components/schemas/ODQAResponse200Unit' + examples: + GenericRequestBody: + description: one exaustive example + value: + id: 5d9b755eb8cd280022907f27 + location: lab + utterances: + - id: 5d9b755eb8cd280022907f29 + text: Hello + user: + id: 5d9b755eb8cd280022907f25 + user_telegram_id: vasily + user_type: human + device_type: cmd + persona: [] + profile: + name: None + gender: None + birthdate: None + location: None + home_coordinates: None + work_coordinates: None + occupation: None + income_per_year: None + attributes: {} + annotations: + ner: + tokens: + - Hello + tags: + - O + date_time: '2019-10-07 20:26:54.409000' + hypotheses: + - skill_name: chitchat + text: Hi! + confidence: 0.6 + - skill_name: odqa + text: to my friends + confidence: 0.23 + - id: 5d9b755eb8cd280022907f28 + active_skill: chitchat + confidence: 0.6 + text: Hi! + orig_text: None + user: + id: 5d9b755eb8cd280022907f26 + user_type: bot + persona: [] + attributes: {} + annotations: + bot_ner: + tokens: + - Hi + - '!' + tags: + - O + - O + date_time: '2019-10-07 20:26:54.856000' + - id: 5d9b7565b8cd280022907f2b + text: What is your name? + user: + id: 5d9b755eb8cd280022907f25 + user_telegram_id: к5698 + user_type: human + device_type: cmd + persona: [] + profile: + name: None + gender: None + birthdate: None + location: None + home_coordinates: None + work_coordinates: None + occupation: None + income_per_year: None + attributes: {} + annotations: + ner: + tokens: + - What + - is + - your + - name + - '?' + tags: + - O + - O + - O + - O + - O + date_time: '2019-10-07 20:27:01.193000' + hypotheses: + - skill_name: chitchat + text: My name is DeepPavlov Agent! + confidence: 0.9 + - skill_name: odqa + text: Alexander the Great + confidence: 0.5 + - id: 5d9b7565b8cd280022907f2a + active_skill: chitchat + confidence: 0.6 + text: My name is DeepPavlov Agent! + orig_text: None + user: + id: 5d9b755eb8cd280022907f26 + user_type: bot + persona: [] + attributes: {} + annotations: + bot_ner: + tokens: + - My + - name + - is + - DeepPavlov + - Agent + - '!' + tags: + - O + - O + - O + - O + - O + - O + date_time: '2019-10-07 20:27:01.367000' + channel_type: cmd_client + human: + id: 5d9b755eb8cd280022907f25 + user_telegram_id: к5698 + user_type: human + device_type: cmd + persona: [] + profile: + name: None + gender: None + birthdate: None + location: None + home_coordinates: None + work_coordinates: None + occupation: None + income_per_year: None + attributes: {} + bot: + id: 5d9b755eb8cd280022907f26 + user_type: bot + persona: [] + attributes: {} + version: 0.12.0 + ODQAResponse: + description: An example of Open Domain Question Answering (ODQA) skill. + value: + responses: + - + - text: Peter the Great was born at 1672. + confidence: 0.947 + - text: at 1672 + confidence: 0.998 + - + - text: The Earth population is 7 billions. + confidence: 0.3333 + - text: 7 billions + confidence: 0.36 \ No newline at end of file From 0f2f352c0b73ebad17bbe7d5053664101f996a11 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 11 Oct 2019 18:14:47 +0300 Subject: [PATCH 120/133] fix: State API title --- docs/source/_static/apispec/agent_v0.12.0.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/_static/apispec/agent_v0.12.0.yml b/docs/source/_static/apispec/agent_v0.12.0.yml index e37f3e6f..5205a86a 100644 --- a/docs/source/_static/apispec/agent_v0.12.0.yml +++ b/docs/source/_static/apispec/agent_v0.12.0.yml @@ -1,6 +1,6 @@ openapi: 3.0.1 info: - title: DeepPavlov Agent Services REST API + title: DeepPavlov Agent State API version: 0.12.0 description: >- Agents built with DeepPavlov Agent communicate with their Services via HTTP, so From 454fdd8c7c32d317c2de660400360af2a370300b Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 11 Oct 2019 18:59:43 +0300 Subject: [PATCH 121/133] style: minor fixes --- core/pipeline.py | 3 +-- core/run.py | 8 +++----- state_formatters/dp_formatters.py | 18 +++++++++--------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/core/pipeline.py b/core/pipeline.py index 87cc3442..c4fe85e0 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -1,5 +1,4 @@ from collections import defaultdict, Counter -from core.service import Service class Pipeline: @@ -33,7 +32,7 @@ def process_service_names(self): self.services[name_prev_service].next_services.add(service) return wrong_names # wrong names means that some service_names, used in previous services don't exist - def get_next_services(self, done: set = None, waiting: set =None): + def get_next_services(self, done: set = None, waiting: set = None): if done is None: done = set() if waiting is None: diff --git a/core/run.py b/core/run.py index c9966037..42a8df9c 100644 --- a/core/run.py +++ b/core/run.py @@ -157,14 +157,12 @@ async def dialog(request): if dialog_id == 'all': dialogs = Dialog.objects() return web.json_response([i.to_dict() for i in dialogs]) - elif len(dialog_id) == 24 and all(c in hexdigits for c in dialog_id): + if len(dialog_id) == 24 and all(c in hexdigits for c in dialog_id): d = Dialog.objects(id__exact=dialog_id) if not d: raise web.HTTPNotFound(reason=f'dialog with id {dialog_id} is not exist') - else: - return web.json_response(d[0].to_dict()) - else: - raise web.HTTPBadRequest(reason='dialog id should be 24-character hex string') + return web.json_response(d[0].to_dict()) + raise web.HTTPBadRequest(reason='dialog id should be 24-character hex string') def main(): diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index e0cd0330..5c9877df 100644 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -66,14 +66,14 @@ def base_skill_output_formatter(payload): def base_annotator_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) - elif mode == 'out': + if mode == 'out': return payload def ner_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) - elif mode == 'out': + if mode == 'out': return {'tokens': payload[0], 'tags': payload[1]} @@ -81,14 +81,14 @@ def ner_formatter(payload: Any, model_args_names=('x',), mode='in'): def sentiment_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) - elif mode == 'out': + if mode == 'out': return [el for el in payload] def chitchat_odqa_formatter(payload: Any, model_args_names=('x',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) - elif mode == 'out': + if mode == 'out': class_name = payload[0] if class_name in ['speech', 'negative']: response = ['chitchat'] @@ -100,17 +100,17 @@ def chitchat_odqa_formatter(payload: Any, model_args_names=('x',), mode='in'): def odqa_formatter(payload: Any, model_args_names=('question_raw',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) - elif mode == 'out': + if mode == 'out': return [{"text": payload[0], - "confidence": 0.5}] + "confidence": 0.5}] def chitchat_formatter(payload: Any, model_args_names=('q',), mode='in'): if mode == 'in': return last_utterances(payload, model_args_names) - elif mode == 'out': + if mode == 'out': return [{"text": payload[0], - "confidence": 0.5}] + "confidence": 0.5}] def chitchat_example_formatter(payload: Any, @@ -122,7 +122,7 @@ def chitchat_example_formatter(payload: Any, model_args_names[1]: parsed['last_annotations'], model_args_names[2]: parsed['utterances_histories'], model_args_names[3]: parsed['dialogs']} - elif mode == 'out': + if mode == 'out': return {"text": payload[0], "confidence": payload[1], "name": payload[2]} From e7cda0b5008019c09b09648cc5baeac351262f1b Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Fri, 11 Oct 2019 19:51:44 +0300 Subject: [PATCH 122/133] style: fixes --- core/pipeline.py | 2 +- core/run.py | 25 ++++++++++--------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/core/pipeline.py b/core/pipeline.py index 44015850..b9e050c9 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -61,7 +61,7 @@ def process_service_names(self): self.services[name_prev_service].next_services.add(service) return wrong_names # wrong names means that some service_names, used in previous services don't exist - def get_next_services(self, done: set = None, waiting: set =None): + def get_next_services(self, done: set = None, waiting: set = None): if done is None: done = set() if waiting is None: diff --git a/core/run.py b/core/run.py index 4c3fa217..ebd732fa 100644 --- a/core/run.py +++ b/core/run.py @@ -1,33 +1,29 @@ -import asyncio +import logging import argparse import uuid -import logging - -from aiohttp import web from datetime import datetime from string import hexdigits from os import getenv +import asyncio +from aiohttp import web +from aiogram import Bot +from aiogram.utils import executor +from aiogram.dispatcher import Dispatcher + from core.agent import Agent from core.pipeline import Pipeline, Service from core.connectors import EventSetOutputConnector, HttpOutputConnector from core.config_parser import parse_old_config from core.state_manager import StateManager - from state_formatters.output_formatters import http_api_output_formatter, http_debug_output_formatter -from aiogram import Bot -from aiogram.utils import executor -from aiogram.dispatcher import Dispatcher - - logger = logging.getLogger('service_logger') logger.setLevel(logging.INFO) fh = logging.FileHandler('../service.log') fh.setLevel(logging.INFO) logger.addHandler(fh) - parser = argparse.ArgumentParser() parser.add_argument("-ch", "--channel", help="run agent in telegram, cmd_client or http_client", type=str, choices=['cmd_client', 'http_client', 'telegram'], default='cmd_client') @@ -90,8 +86,8 @@ async def on_shutdown(app): async def init_app(register_msg, intermediate_storage, - on_startup, on_shutdown_func=on_shutdown, - debug=False): + on_startup, on_shutdown_func=on_shutdown, + debug=False): app = web.Application(debug=True) handle_func = await api_message_processor( register_msg, intermediate_storage, debug) @@ -117,8 +113,7 @@ async def startup_background_tasks(app): async def api_message_processor(register_msg, intermediate_storage, debug=False): async def api_handle(request): - user_id = None - bot_response = None + response = None if request.method == 'POST': if request.headers.get('content-type') != 'application/json': raise web.HTTPBadRequest(reason='Content-Type should be application/json') From 80fbe20f7e57eed7eab3829e7e2d51d53a3296a6 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 14 Oct 2019 14:39:10 +0300 Subject: [PATCH 123/133] fix: syntax errors and style after merge --- core/config_parser.py | 4 +--- core/run.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/core/config_parser.py b/core/config_parser.py index 2605c553..ad9f132e 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -15,9 +15,7 @@ from core import gateways_map -def prepare_agent_gateway(on_channel_callback=None, on_service_callb - - ack=None): +def prepare_agent_gateway(on_channel_callback=None, on_service_callback=None): transport_type = TRANSPORT_SETTINGS['transport']['type'] gateway_cls = gateways_map[transport_type]['agent'] return gateway_cls(config=TRANSPORT_SETTINGS, diff --git a/core/run.py b/core/run.py index 3d816a91..bfd6ddb4 100644 --- a/core/run.py +++ b/core/run.py @@ -1,7 +1,6 @@ import logging import argparse import uuid -import logging from datetime import datetime from string import hexdigits from os import getenv @@ -214,10 +213,9 @@ def run_default(): register_msg, process_callable = prepare_agent(services, endpoint, input_srv, args.response_logger) if gateway: gateway.on_channel_callback = register_msg - gateway.on_service_callback = process + gateway.on_service_callback = process_callable app = init_app(register_msg, intermediate_storage, prepare_startup(workers, process_callable, session), on_shutdown, args.debug) - web.run_app(app, port=args.port) elif CHANNEL == 'telegram': From 589ffacad583b8f78beee57871d49fd18bfe4ca1 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 14 Oct 2019 16:53:50 +0300 Subject: [PATCH 124/133] fix: imports and init_agent() --- utils/agent_batch_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/agent_batch_test.py b/utils/agent_batch_test.py index b8d0cc82..a28ba720 100644 --- a/utils/agent_batch_test.py +++ b/utils/agent_batch_test.py @@ -7,7 +7,8 @@ from core.agent import Agent from core.state_manager import StateManager -from core.pipeline import Pipeline, Service +from core.pipeline import Pipeline +from core.service import Service from core.config_parser import parse_old_config from core.connectors import EventSetOutputConnector @@ -21,7 +22,7 @@ def init_agent(): - services, workers, session = parse_old_config() + services, workers, session, _ = parse_old_config() endpoint = Service('cmd_responder', EventSetOutputConnector('cmd_responder').send, StateManager.save_dialog_dict, 1, ['responder']) input_srv = Service('input', None, StateManager.add_human_utterance_simple_dict, 1, ['input']) From a300714458a2b8c75efed07665f58d8d9492e732 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Mon, 14 Oct 2019 19:52:38 +0300 Subject: [PATCH 125/133] docs: update config variables, formatters, user commands docs --- docs/source/api/user_state_api.rst | 23 ++------ docs/source/index.rst | 14 ++++- docs/source/intro/overview.rst | 19 +++++-- docs/source/state_formatters/formatters.rst | 63 +++++++++++++++++++++ docs/source/user_commands/commands.rst | 4 ++ 5 files changed, 100 insertions(+), 23 deletions(-) create mode 100644 docs/source/state_formatters/formatters.rst create mode 100644 docs/source/user_commands/commands.rst diff --git a/docs/source/api/user_state_api.rst b/docs/source/api/user_state_api.rst index 3fd0e0b3..492a4a8e 100644 --- a/docs/source/api/user_state_api.rst +++ b/docs/source/api/user_state_api.rst @@ -4,14 +4,9 @@ User State API Each utterance in a **Dialog** is generated either by a **Human** or by a **Bot**. To understand, which of two has generated the utterance, refer to the ``user.user_type`` field: - .. code:: json + .. code:: javascript - "utterances": [ - { - "user": { - "user_type": "human" - } - }] + "utterances": [{"user": {"user_type": "human"}}] A `Skill `__ can update any fields in **User** (**Human** or **Bot**) objects. If a **Skill** updates a **Human**, the **Human** fields will be changed in this utterance accordingly. If a **Skill** updates a **Bot**, the **Bot** fields will be @@ -20,17 +15,11 @@ changed in the *next* (generated by the bot) utterance. Each new dialog starts with a new **Bot** with all default fields. However, the **Human** object is updated permanently, and when a **Human** starts a new dialog, the object is retrieved from a database with all updated fields. -The history of all changes made by skills to users can be looked up at the selected skills responses -in the ``selected_skills`` field of a human utterance: +The history of all changes made by skills to users can be looked up at the list of possible responses in the +``hypotheses`` field of a human utterance: - .. code:: json + .. code:: javascript - "utterances": [ - { - "user": { - "user_type": "human" - }, - "selected_skills": {} - }] + "utterances": [{"user": {"user_type": "human"}, "hypotheses": []}] .. _skill: https://deeppavlov-agent.readthedocs.io/en/latest/api/services_http_api.html#skill diff --git a/docs/source/index.rst b/docs/source/index.rst index ba860985..43715d80 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -22,4 +22,16 @@ Welcome to DeepPavlov Agent documentation! :maxdepth: 2 :caption: User State API - api/user_state_api \ No newline at end of file + api/user_state_api + +.. toctree:: + :maxdepth: 2 + :caption: User Commands + + user_commands/commands + +.. toctree:: + :maxdepth: 2 + :caption: State Formatters + + state_formatters/formatters \ No newline at end of file diff --git a/docs/source/intro/overview.rst b/docs/source/intro/overview.rst index 29e602b9..49aebd57 100644 --- a/docs/source/intro/overview.rst +++ b/docs/source/intro/overview.rst @@ -83,13 +83,15 @@ change anything. * A port on a service host machine * **endpoint** * A service URL endpoint, **"/skill"** by default +* **url** (optional) + * A service url. By default it is generated from **protocol + host + port + endpoint** * **path** * A path to the agent service config file, currently valid only for DeepPavlov skills * **env** * Environment variables dictionary -* **external** +* **external** (optional) * If the service is running from the **dp-agent** repo. **False** by default. -* **dockerfile** +* **dockerfile** (optional) * Specify a dockerfile name available inside the Agent repo. **"dockerfile_skill_cpu"** by default. Available options: @@ -99,9 +101,13 @@ change anything. * **formatter** * The name of a function that converts the Agent state into a service input format and converts a service output format into the Agent state +* **batch_size** (optional) + A size of input batch for the services. By default it's always 1, but for neural services it is usually makes more + sense to increase it for better performance. -Notice that if you want to run an Agent from only one skill, you can leave -**SKILL_SELECTORS** and **RESPONSE_SELECTORS** empty. + +Notice that you can leave **SKILL_SELECTORS** and **RESPONSE_SELECTORS** empty. If you do so, all +skills are selected at each user utterance and the final response is selected by the skills' confidence. Also you can include in the Agent configuration any external service running on some other machine. @@ -275,7 +281,10 @@ Agent can run both from container and from a local machine. The default Agent po "response": "phrase, which were generated by skills in order to respond" } - In case of wrong format, HTTP errors will be returned + In case of wrong format, HTTP errors will be returned. + + If you need the Agent server to return something different than ``user_id`` and ``reponse``, try the + :ref:`output formatters `. 3. In addition to everything else the HTTP api server allows viewing dialogs in the database through GET requests. The result is returned in json format which can be easily prettifyed with various browser extensions. diff --git a/docs/source/state_formatters/formatters.rst b/docs/source/state_formatters/formatters.rst new file mode 100644 index 00000000..f40391b8 --- /dev/null +++ b/docs/source/state_formatters/formatters.rst @@ -0,0 +1,63 @@ +**Formatters** are the functions that allow converting the input and output API of services into Agent's API. +In the provided example `configuraton file `__ you can find that each service has its own formatter +function: + +.. code:: python + + { + "name": "odqa", + "formatter": odqa_formatter + } + +DeepPavlov Formatters +===================== + +The pre-built DeepPavlov formatters exist for demonstration purposes. These formatters have three attributes: + + * **payload** + + If ``mode==in`` **payload** is a batch of input states (dialogs) to the model. If the model can't accept a list of + dialog dictionaries, they can be formatted into something else, for example into a batch of last utterances from + each dialog. + + If ``mode==out`` **payload** is the result returned by the DeepPavlov model. Unlike ``in`` mode, here we format a + *a single element* of batch results returned by the model. The same formatting is applied to all other elements of + the batch. Here the result should be formatted according to the Agent's :doc:`Services HTTP API <../api/services_http_api>`. + + * **model_args_names** + + Should be the same names as the particular DeepPavlov model config accepts. + + * **mode** + + Can be ``in`` or ``out``. In the ``in`` mode we format everything that goes from the Agent to the service. In the ``out`` + mode we format everything that goes from the service to the Agent. + +.. _output-formatters: + +Output Formatters +================= + +Output Formatters allows regularization of what the Agent's HTTP server can return. By default the +Agent's server returns only a bot utterance and a user id: + +.. code:: javascript + + { + "user_id": "same user id as in request", + "response": "phrase, which were generated by skills in order to respond" + } + +But if you need the server to return some additional information, for example the name of the active skill, you can do +the following: + + * Edit the code in the ``state_formatters/http_debug_output_formatter()`` method. It accepts the whole Agent's state + as ``payload`` argument, so anything available in the state can be extracted from ``payload``. + + * Run ``run.py`` with ``debug==True`` option. + + +.. _config_file: https://github.com/deepmipt/dp-agent/blob/master/config.py +.. _dp_formatters: https://github.com/deepmipt/dp-agent/blob/master/state_formatters/dp_formatters.py + + diff --git a/docs/source/user_commands/commands.rst b/docs/source/user_commands/commands.rst new file mode 100644 index 00000000..72560445 --- /dev/null +++ b/docs/source/user_commands/commands.rst @@ -0,0 +1,4 @@ +/start +====== + +To start a new dialog send **"/start"** utterance to the bot. \ No newline at end of file From 96e66cbf2c0c8904acd9e5e699e54c28ab92bb93 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 15 Oct 2019 14:45:30 +0300 Subject: [PATCH 126/133] fix: imports and interfaces --- core/__init__.py | 15 --------- core/config_parser.py | 6 ++-- core/run.py | 7 +++-- core/transport/mapping.py | 14 +++++++++ core/transport/messages.py | 31 ++++++++----------- .../{transport_settings.py => settings.py} | 0 6 files changed, 34 insertions(+), 39 deletions(-) create mode 100644 core/transport/mapping.py rename core/transport/{transport_settings.py => settings.py} (100%) diff --git a/core/__init__.py b/core/__init__.py index 987b1753..facd58b8 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1,20 +1,5 @@ -from core.transport.gateways.rabbitmq import RabbitMQAgentGateway, RabbitMQServiceGateway, RabbitMQChannelGateway -from core.connectors import ServiceGatewayHTTPConnector from core.log import init_logger - STATE_API_VERSION = "0.12.0" init_logger() - -gateways_map = { - 'AMQP': { - 'agent': RabbitMQAgentGateway, - 'service': RabbitMQServiceGateway, - 'channel': RabbitMQChannelGateway - } -} - -connectors_map = { - 'AMQP': ServiceGatewayHTTPConnector -} diff --git a/core/config_parser.py b/core/config_parser.py index ad9f132e..60f04e2a 100644 --- a/core/config_parser.py +++ b/core/config_parser.py @@ -6,18 +6,18 @@ from core.transform_config import SKILLS, ANNOTATORS_1, ANNOTATORS_2, ANNOTATORS_3, SKILL_SELECTORS, \ RESPONSE_SELECTORS, POSTPROCESSORS -from core.transport.transport_settings import TRANSPORT_SETTINGS from core.connectors import HTTPConnector, ConfidenceResponseSelectorConnector, AioQueueConnector, \ QueueListenerBatchifyer, AgentGatewayToServiceConnector from core.pipeline import simple_workflow_formatter from core.service import Service from core.state_manager import StateManager -from core import gateways_map +from core.transport.settings import TRANSPORT_SETTINGS def prepare_agent_gateway(on_channel_callback=None, on_service_callback=None): + from core.transport.mapping import GATEWAYS_MAP transport_type = TRANSPORT_SETTINGS['transport']['type'] - gateway_cls = gateways_map[transport_type]['agent'] + gateway_cls = GATEWAYS_MAP[transport_type]['agent'] return gateway_cls(config=TRANSPORT_SETTINGS, on_service_callback=on_service_callback, on_channel_callback=on_channel_callback) diff --git a/core/run.py b/core/run.py index e3fda9ac..56e6f636 100644 --- a/core/run.py +++ b/core/run.py @@ -17,7 +17,6 @@ from core.connectors import EventSetOutputConnector, HttpOutputConnector from core.config_parser import parse_old_config, get_service_gateway_config from core.state_manager import StateManager -from core import gateways_map, connectors_map from state_formatters.output_formatters import http_api_output_formatter, http_debug_output_formatter @@ -248,17 +247,19 @@ def run_agent(): def run_service(): + from core.transport.mapping import GATEWAYS_MAP, CONNECTORS_MAP + service_name = args.service_name gateway_config = get_service_gateway_config(service_name) service_config = gateway_config['service'] formatter = service_config['formatter'] connector_type = service_config['protocol'] - connector_cls = connectors_map[connector_type] + connector_cls = CONNECTORS_MAP[connector_type] connector = connector_cls(service_config=service_config, formatter=formatter) transport_type = gateway_config['transport']['type'] - gateway_cls = gateways_map[transport_type]['service'] + gateway_cls = GATEWAYS_MAP[transport_type]['service'] gateway = gateway_cls(config=gateway_config, to_service_callback=connector.send_to_service) loop = asyncio.get_event_loop() diff --git a/core/transport/mapping.py b/core/transport/mapping.py new file mode 100644 index 00000000..bb9c568f --- /dev/null +++ b/core/transport/mapping.py @@ -0,0 +1,14 @@ +from core.transport.gateways.rabbitmq import RabbitMQAgentGateway, RabbitMQServiceGateway, RabbitMQChannelGateway +from core.connectors import ServiceGatewayHTTPConnector + +GATEWAYS_MAP = { + 'AMQP': { + 'agent': RabbitMQAgentGateway, + 'service': RabbitMQServiceGateway, + 'channel': RabbitMQChannelGateway + } +} + +CONNECTORS_MAP = { + 'AMQP': ServiceGatewayHTTPConnector +} diff --git a/core/transport/messages.py b/core/transport/messages.py index 8c7fc266..6c589b16 100644 --- a/core/transport/messages.py +++ b/core/transport/messages.py @@ -1,10 +1,14 @@ -from typing import TypeVar, Any +from typing import TypeVar, Any, Dict class MessageBase: + + def __init__(self, msg_type: str, agent_name: str): + self.msg_type = msg_type + self.agent_name = agent_name + @classmethod def from_json(cls, message_json): - message_json.pop('msg_type') return cls(**message_json) def to_json(self) -> dict: @@ -15,22 +19,19 @@ def to_json(self) -> dict: class ServiceTaskMessage(MessageBase): - msg_type = 'service_task' agent_name: str service_name: str task_uuid: str - dialog: dict + dialog: Dict - def __init__(self, agent_name: str, service_name: str, task_uuid: str, dialog: dict) -> None: - self.msg_type = self.__class__.msg_type - self.agent_name = agent_name + def __init__(self, agent_name: str, service_name: str, task_uuid: str, dialog: Dict) -> None: + super().__init__('service_task', agent_name) self.service_name = service_name self.task_uuid = task_uuid self.dialog = dialog class ServiceResponseMessage(MessageBase): - msg_type = 'service_response' agent_name: str task_uuid: str service_name: str @@ -40,9 +41,7 @@ class ServiceResponseMessage(MessageBase): def __init__(self, agent_name: str, task_uuid: str, service_name: str, service_instance_id: str, dialog_id: str, response: Any) -> None: - - self.msg_type = self.__class__.msg_type - self.agent_name = agent_name + super().__init__('service_response', agent_name) self.task_uuid = task_uuid self.service_name = service_name self.service_instance_id = service_instance_id @@ -51,22 +50,19 @@ def __init__(self, agent_name: str, task_uuid: str, service_name: str, service_i class ToChannelMessage(MessageBase): - msg_type = 'to_channel_message' agent_name: str channel_id: str user_id: str response: str def __init__(self, agent_name: str, channel_id: str, user_id: str, response: str) -> None: - self.msg_type = self.__class__.msg_type - self.agent_name = agent_name + super().__init__('to_channel_message', agent_name) self.channel_id = channel_id self.user_id = user_id self.response = response class FromChannelMessage(MessageBase): - msg_type = 'from_channel_message' agent_name: str channel_id: str user_id: str @@ -74,8 +70,7 @@ class FromChannelMessage(MessageBase): reset_dialog: bool def __init__(self, agent_name: str, channel_id: str, user_id: str, utterance: str, reset_dialog: bool) -> None: - self.msg_type = self.__class__.msg_type - self.agent_name = agent_name + super().__init__('from_channel_message', agent_name) self.channel_id = channel_id self.user_id = user_id self.utterance = utterance @@ -91,7 +86,7 @@ def __init__(self, agent_name: str, channel_id: str, user_id: str, utterance: st def get_transport_message(message_json: dict) -> TMessageBase: - message_type = message_json['msg_type'] + message_type = message_json.pop('msg_type') if message_type not in _message_wrappers_map: raise ValueError(f'Unknown transport message type: {message_type}') diff --git a/core/transport/transport_settings.py b/core/transport/settings.py similarity index 100% rename from core/transport/transport_settings.py rename to core/transport/settings.py From 69f350ddb481290cf6686fe6f8c65213e10e695e Mon Sep 17 00:00:00 2001 From: ignatov Date: Tue, 15 Oct 2019 16:42:33 +0300 Subject: [PATCH 127/133] fix: added keep_alive_timeout to prevent ConnectionResetError --- dp/dockerfile_skill_cpu | 1 + dp/dockerfile_skill_gpu | 1 + 2 files changed, 2 insertions(+) diff --git a/dp/dockerfile_skill_cpu b/dp/dockerfile_skill_cpu index 4188b57f..195b748c 100644 --- a/dp/dockerfile_skill_cpu +++ b/dp/dockerfile_skill_cpu @@ -17,5 +17,6 @@ ENV PYTHONPATH "${PYTONPATH}:/dp-agent" RUN python -m deeppavlov install $CONFIG RUN python dp/dp_server_config.py +RUN sed -i "/uvicorn.run/s/app,/app, timeout_keep_alive=20,/g" "/base/DeepPavlov/deeppavlov/utils/server/server.py" ENTRYPOINT python -m deeppavlov riseapi $CONFIG -p $PORT -d \ No newline at end of file diff --git a/dp/dockerfile_skill_gpu b/dp/dockerfile_skill_gpu index 8b95c2ea..72e2e83d 100644 --- a/dp/dockerfile_skill_gpu +++ b/dp/dockerfile_skill_gpu @@ -18,5 +18,6 @@ ENV PYTHONPATH "${PYTONPATH}:/dp-agent" RUN pip install -r /base/DeepPavlov/deeppavlov/requirements/tf-gpu.txt RUN python -m deeppavlov install $CONFIG RUN python dp/dp_server_config.py +RUN sed -i "/uvicorn.run/s/app,/app, timeout_keep_alive=20,/g" "/base/DeepPavlov/deeppavlov/utils/server/server.py" ENTRYPOINT python -m deeppavlov riseapi $CONFIG -p $PORT -d \ No newline at end of file From 3cb25aaaa07ee0d3d7dc8a114fd45dbaa668a5bd Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 15 Oct 2019 19:00:31 +0300 Subject: [PATCH 128/133] refactor: remove redundant state_manager_test --- tests/state_manager_test.py | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 tests/state_manager_test.py diff --git a/tests/state_manager_test.py b/tests/state_manager_test.py deleted file mode 100644 index 8c87e778..00000000 --- a/tests/state_manager_test.py +++ /dev/null @@ -1,4 +0,0 @@ -from core.state_manager import StateManager - -sm = StateManager() -sm.get_or_create_users() From a1c757466c2be27c1a3195a5924f1b91f8f24511 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Tue, 15 Oct 2019 19:00:58 +0300 Subject: [PATCH 129/133] feat: utterance field in HumanUtterance --- core/agent.py | 13 ++++++++----- core/run.py | 15 ++++++++++----- core/state_manager.py | 13 ++++++++----- core/state_schema.py | 6 +++++- 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/core/agent.py b/core/agent.py index 934a48b8..08426530 100644 --- a/core/agent.py +++ b/core/agent.py @@ -2,7 +2,7 @@ from collections import defaultdict from time import time -from typing import Any, Optional, Callable, Hashable +from typing import Any, Optional, Callable, Hashable, Dict from core.pipeline import Pipeline from core.state_manager import StateManager @@ -75,9 +75,10 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res if response and service.state_processor_method: service.state_processor_method(dialog=workflow_record['dialog'], dialog_object=workflow_record['dialog_object'], - payload=response) + payload=response, + message_attrs=kwargs.get('message_attrs', {})) - # passing kwargs to services record + # passing kwargs to services record if not set(service_data.keys()).intersection(set(kwargs.keys())): service_data.update(kwargs) @@ -121,13 +122,15 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, kwargs['event'] = event self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, hold_flush=True, **kwargs) self.register_service_request(str(dialog.id), 'input') - await self.process(dialog_id=str(dialog.id), service_name='input', response=utterance) + await self.process(dialog_id=str(dialog.id), service_name='input', response=utterance, + message_attrs=kwargs.get('message_attrs', {})) await event.wait() return self.flush_record(str(dialog.id)) else: self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) self.register_service_request(str(dialog.id), 'input') - await self.process(dialog_id=str(dialog.id), service_name='input', response=utterance) + await self.process(dialog_id=str(dialog.id), service_name='input', response=utterance, + message_attrs=kwargs.get('message_attrs', {})) async def process(self, dialog_id, service_name=None, response: Any = None, **kwargs): workflow_record = self.get_workflow_record(dialog_id) diff --git a/core/run.py b/core/run.py index 56e6f636..8dc41d17 100644 --- a/core/run.py +++ b/core/run.py @@ -121,17 +121,22 @@ async def api_handle(request): if request.headers.get('content-type') != 'application/json': raise web.HTTPBadRequest(reason='Content-Type should be application/json') data = await request.json() - user_id = data.get('user_id') - payload = data.get('payload', '') + user_id = data.pop('user_id') + payload = data.pop('payload', '') if not user_id: raise web.HTTPBadRequest(reason='user_id key is required') event = asyncio.Event() message_uuid = uuid.uuid3(uuid.NAMESPACE_DNS, f'{user_id}{payload}{datetime.now()}').hex - await register_msg(utterance=payload, user_telegram_id=user_id, user_device_type='http', - date_time=datetime.now(), location='', channel_type=CHANNEL, - event=event, message_uuid=message_uuid) + await register_msg(utterance=payload, user_telegram_id=user_id, + user_device_type=data.pop('user_device_type', 'http'), + date_time=datetime.now(), + location=data.pop('location', ''), + channel_type=CHANNEL, + event=event, + message_uuid=message_uuid, + message_attrs=data) await event.wait() bot_response = intermediate_storage.pop(message_uuid) diff --git a/core/state_manager.py b/core/state_manager.py index 9cfe4d8b..06670958 100644 --- a/core/state_manager.py +++ b/core/state_manager.py @@ -42,12 +42,13 @@ def create_new_bot(persona: Optional[List[str]] = None): @staticmethod def create_new_human_utterance(text, user: Human, date_time, annotations=None, - hypotheses=None): + hypotheses=None, message_attributes=None): utt = HumanUtterance(text=text, user=user.to_dict(), date_time=date_time, annotations=annotations or HumanUtterance.annotations.default, - hypotheses=hypotheses or HumanUtterance.hypotheses.default) + hypotheses=hypotheses or HumanUtterance.hypotheses.default, + attributes=message_attributes or HumanUtterance.attributes.default) utt.save() return utt @@ -93,9 +94,10 @@ def get_or_create_dialog(cls, user, location, channel_type, should_reset=False): @classmethod def add_human_utterance(cls, dialog: Dialog, user: Human, text: str, date_time: datetime, annotation: Optional[dict] = None, - hypothesis: Optional[dict] = None) -> None: + hypothesis: Optional[dict] = None, + message_attrs: Optional[dict] = None) -> None: utterance = cls.create_new_human_utterance(text, user, date_time, annotation, - hypothesis) + hypothesis, message_attrs) dialog.utterances.append(utterance) dialog.save() @@ -183,6 +185,7 @@ def add_human_utterance_simple_dict(cls, dialog: Dict, dialog_object: Dialog, pa utterance['text'] = payload utterance['date_time'] = str(datetime.now()) utterance['user'] = dialog['human'] + utterance['attributes'] = kwargs.get('message_attrs', {}) dialog['utterances'].append(utterance) @staticmethod @@ -236,7 +239,7 @@ def add_text_dict(dialog: Dict, payload: str): dialog['utterances'][-1]['text'] = payload @staticmethod - def save_dialog_dict(dialog: Dict, dialog_object: Dialog, payload=None): + def save_dialog_dict(dialog: Dict, dialog_object: Dialog, payload=None, **kwargs): utt_objects = [] for utt in dialog['utterances'][::-1]: if not utt['id']: diff --git a/core/state_schema.py b/core/state_schema.py index 71ed7ad7..1c27fafd 100644 --- a/core/state_schema.py +++ b/core/state_schema.py @@ -10,6 +10,7 @@ 'annotations': {}, 'date_time': None, 'hypotheses': [], + 'attributes': {} } BOT_UTTERANCE_SCHEMA = { @@ -133,6 +134,7 @@ def make_from_dict(self, *args, **kwargs): class HumanUtterance(Utterance): hypotheses = ListField(default=[]) + attributes = DictField(default={}) def to_dict(self): return { @@ -141,7 +143,8 @@ def to_dict(self): 'user': self.user, 'annotations': self.annotations, 'date_time': str(self.date_time), - 'hypotheses': self.hypotheses + 'hypotheses': self.hypotheses, + 'attributes': self.attributes } @classmethod @@ -152,6 +155,7 @@ def make_from_dict(cls, payload): utterance.annotations = payload['annotations'] utterance.date_time = payload['date_time'] utterance.hypotheses = payload['hypotheses'] + utterance.attributes = payload['attributes'] utterance.user = payload['user'] utterance.save() return utterance From b8b1a7b1e5db974443d9143ec0b6cbda6b79aff9 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Wed, 16 Oct 2019 15:05:04 +0300 Subject: [PATCH 130/133] fix: pop message_attrs from kwargs, improve styl --- core/agent.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/core/agent.py b/core/agent.py index 08426530..a8ce35c3 100644 --- a/core/agent.py +++ b/core/agent.py @@ -76,7 +76,7 @@ def process_service_response(self, dialog_id: str, service_name: str = None, res service.state_processor_method(dialog=workflow_record['dialog'], dialog_object=workflow_record['dialog_object'], payload=response, - message_attrs=kwargs.get('message_attrs', {})) + message_attrs=kwargs.pop('message_attrs', {})) # passing kwargs to services record if not set(service_data.keys()).intersection(set(kwargs.keys())): @@ -117,20 +117,22 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, user = self.state_manager.get_or_create_user(user_telegram_id, user_device_type) should_reset = True if utterance == TG_START_UTT else False dialog = self.state_manager.get_or_create_dialog(user, location, channel_type, should_reset=should_reset) + dialog_id = str(dialog.id) + service_name = 'input' + message_attrs = kwargs.pop('message_attrs', {}) + if require_response: event = asyncio.Event() kwargs['event'] = event self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, hold_flush=True, **kwargs) - self.register_service_request(str(dialog.id), 'input') - await self.process(dialog_id=str(dialog.id), service_name='input', response=utterance, - message_attrs=kwargs.get('message_attrs', {})) + self.register_service_request(dialog_id, service_name) + await self.process(dialog_id, service_name, response=utterance, message_attrs=message_attrs) await event.wait() - return self.flush_record(str(dialog.id)) + return self.flush_record(dialog_id) else: self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) - self.register_service_request(str(dialog.id), 'input') - await self.process(dialog_id=str(dialog.id), service_name='input', response=utterance, - message_attrs=kwargs.get('message_attrs', {})) + self.register_service_request(dialog_id, service_name) + await self.process(dialog_id, service_name, response=utterance, message_attrs=message_attrs) async def process(self, dialog_id, service_name=None, response: Any = None, **kwargs): workflow_record = self.get_workflow_record(dialog_id) From 6eaddf86f87c6e1cb3eeadf6678de0c9b29735e8 Mon Sep 17 00:00:00 2001 From: ignatov Date: Wed, 16 Oct 2019 15:54:37 +0300 Subject: [PATCH 131/133] feat: added tfhub volume --- docker-compose.yml | 1 + dp/dockerfile_skill_cpu | 3 +++ dp/dockerfile_skill_gpu | 3 +++ 3 files changed, 7 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index e4f33a67..6a5d1280 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -120,4 +120,5 @@ services: - .:/dp-agent - ${EXTERNAL_FOLDER}/dp_logs:/logs - ${EXTERNAL_FOLDER}/.deeppavlov:/root/.deeppavlov + - ${EXTERNAL_FOLDER}/tfhub:/tmp/tfhub version: '3.7' diff --git a/dp/dockerfile_skill_cpu b/dp/dockerfile_skill_cpu index 195b748c..1f0abbd5 100644 --- a/dp/dockerfile_skill_cpu +++ b/dp/dockerfile_skill_cpu @@ -9,6 +9,9 @@ ENV CONFIG=$skillconfig ENV PORT=$skillport ENV HOST=$skillhost ENV ENDPOINT=$skill_endpoint +ENV TFHUB_CACHE_DIR=/tmp/tfhub + +VOLUME /tmp/tfhub RUN mkdir dp-agent WORKDIR /dp-agent diff --git a/dp/dockerfile_skill_gpu b/dp/dockerfile_skill_gpu index 72e2e83d..d8b75061 100644 --- a/dp/dockerfile_skill_gpu +++ b/dp/dockerfile_skill_gpu @@ -9,6 +9,9 @@ ENV CONFIG=$skillconfig ENV PORT=$skillport ENV HOST=$skillhost ENV ENDPOINT=$skill_endpoint +ENV TFHUB_CACHE_DIR=/tmp/tfhub + +VOLUME /tmp/tfhub RUN mkdir dp-agent WORKDIR /dp-agent From d0f7b65b1ba513e77685c452f8eb367d6c871df4 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Wed, 16 Oct 2019 16:15:21 +0300 Subject: [PATCH 132/133] style: minor improvements --- core/agent.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/agent.py b/core/agent.py index a8ce35c3..8bf0295b 100644 --- a/core/agent.py +++ b/core/agent.py @@ -2,7 +2,7 @@ from collections import defaultdict from time import time -from typing import Any, Optional, Callable, Hashable, Dict +from typing import Any, Optional, Callable, Hashable from core.pipeline import Pipeline from core.state_manager import StateManager @@ -129,10 +129,10 @@ async def register_msg(self, utterance: str, user_telegram_id: Hashable, await self.process(dialog_id, service_name, response=utterance, message_attrs=message_attrs) await event.wait() return self.flush_record(dialog_id) - else: - self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) - self.register_service_request(dialog_id, service_name) - await self.process(dialog_id, service_name, response=utterance, message_attrs=message_attrs) + + self.add_workflow_record(dialog=dialog, deadline_timestamp=deadline_timestamp, **kwargs) + self.register_service_request(dialog_id, service_name) + await self.process(dialog_id, service_name, response=utterance, message_attrs=message_attrs) async def process(self, dialog_id, service_name=None, response: Any = None, **kwargs): workflow_record = self.get_workflow_record(dialog_id) From bea2408a3a5d923aa8572d3d7aa392b39ed009e6 Mon Sep 17 00:00:00 2001 From: Olga Gureenkova Date: Thu, 17 Oct 2019 14:13:02 +0300 Subject: [PATCH 133/133] docs: update according to state api v.0.12.1, http server docs --- core/__init__.py | 2 +- docs/source/_static/api.html | 2 +- docs/source/_static/apispec/agent_v0.12.1.yml | 414 ++++++++++++++++++ docs/source/intro/overview.rst | 38 +- 4 files changed, 445 insertions(+), 11 deletions(-) create mode 100644 docs/source/_static/apispec/agent_v0.12.1.yml diff --git a/core/__init__.py b/core/__init__.py index facd58b8..30d88a8a 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -1,5 +1,5 @@ from core.log import init_logger -STATE_API_VERSION = "0.12.0" +STATE_API_VERSION = "0.12.1" init_logger() diff --git a/docs/source/_static/api.html b/docs/source/_static/api.html index 427c9941..76da02d8 100644 --- a/docs/source/_static/api.html +++ b/docs/source/_static/api.html @@ -13,7 +13,7 @@ - + \ No newline at end of file diff --git a/docs/source/_static/apispec/agent_v0.12.1.yml b/docs/source/_static/apispec/agent_v0.12.1.yml new file mode 100644 index 00000000..04096055 --- /dev/null +++ b/docs/source/_static/apispec/agent_v0.12.1.yml @@ -0,0 +1,414 @@ +openapi: 3.0.1 +info: + title: DeepPavlov Agent State API + version: 0.12.1 + description: >- + Agents built with DeepPavlov Agent communicate with their Services via HTTP, so + endpoints should be specified. +servers: + - url: 'http://localhost:{port}/' + description: Local development server + variables: + port: + default: '4242' +paths: + /: + get: + summary: Root path + responses: + '200': + description: Go to /apidocs/ to see graphical web UI for this API. + '/api/v0/{skill_endpoint}/': + post: + parameters: + - name: skill_endpoint + in: path + required: true + schema: + enum: + - model + summary: Generic skill endpoint + description: >- + An agent built with DeepPavlov Agent sends requests to the services endpoints in + order to retrieve the answers. + requestBody: + description: Description of the request to be executed + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RequestBodySchema' + examples: + general: + $ref: '#/components/examples/GenericRequestBody' + responses: + '200': + description: Request finished succesfully. + content: + application/json: + schema: + $ref: "#/components/schemas/ODQAResponse200Schema" + examples: + odqa: + $ref: "#/components/examples/ODQAResponse" + '404': + description: This skill doesn't exsits. +components: + schemas: + RequestBodySchema: + type: object + properties: + id: + description: REQUIRED. A unique id of the dialog. + type: string + location: + description: 'REQUIRED. A free-formatted location where the dialog is happening.' + type: string + utterances: + description: >- + REQUIRED. A list of all utterances of the dialog. The last utterance always belongs + to a human user. + type: array + items: + oneOf: + - $ref: '#/components/schemas/HumanUtterance' + - $ref: '#/components/schemas/BotUtterance' + human: + $ref: '#/components/schemas/Human' + bot: + $ref: '#/components/schemas/Bot' + channel_type: + description: >- + REQUIRED. A channel where the communication is happening. For example, "telegram", + "facebook", "http". + type: string + Human: + description: 'REQUIRED. A human user in the dialog.' + type: object + properties: + id: + description: REQUIRED. A unique is of the human user. + type: string + user_telegram_id: + description: REQUIRED. A unique Telegram id of the human user. + type: string + user_type: + description: REQUIRED. A user type. Here it is always “human”. + type: string + device_type: + description: >- + REQUIRED. A name of the device which is used by the user. For example, it can be "iphone" or "android". + type: string + persona: + description: REQUIRED. A persona of the human user. It is stored as an array of sentences characterizing the human user. By default this is an empty array. + type: array + items: + type: string + profile: + $ref: '#/components/schemas/Profile' + attributes: + description: Generic key-value attributes. + type: object + items: + type: object + Bot: + description: >- + REQUIRED. A bot user of the dialog. A bot is an agent with a particular skill set. + type: object + properties: + id: + description: REQUIRED. A unique is of the bot user. + type: string + user_type: + description: REQUIRED. A user type. Here it is always “human”. + type: string + persona: + description: REQUIRED. A persona of the bot user. It is stored as an array of sentences characterizing the human user. By default this is an empty array. + type: array + items: + type: string + attributes: + description: Generic key-value attributes. + type: object + items: + type: object + Profile: + description: REQUIRED. A personal information about the human user. + type: object + properties: + gender: + description: REQUIRED. A gender of the human user. + type: string + birthdate: + description: REQUIRED. Birthdate + type: string + format: date + name: + description: REQUIRED. A name of the human user. + type: string + location: + description: REQUIRED. A location of the human user. + type: object + home_coordinates: + description: REQUIRED. Home coordinates of the human user. + type: object + work_coordinates: + description: REQUIRED. Workplace coordinates of the human user. + type: object + occupation: + description: REQUIRED. A profession of the human user. + type: string + income_per_year: + description: REQUIRED. An income of the human user. + type: number + HumanUtterance: + description: RESUIRED. An utterance of the human user. + type: object + properties: + id: + type: string + description: REQUIRED. A unique id of the human utterance. + text: + type: string + description: >- + REQUIRED. Text of the human utterance. If this is the very first utterance of the dialog, + it has the "/start" value. + user: + $ref: '#/components/schemas/Human' + annotations: + $ref: '#/components/schemas/Annotations' + date_time: + type: string + format: datetime + description: REQUIRED. A time of the utterance receiving by the agent server. + hypotheses: + type: array + items: + type: object + description: >- + Response candidates to this particular Utterance, generated by Skills. + attributes: + description: Generic key-value attributes. + type: object + items: + type: object + BotUtterance: + description: RESUIRED. An utterance of the bot user. + type: object + properties: + id: + type: string + description: REQUIRED. A unique id of the bot utterance. + text: + type: string + description: >- + REQUIRED. Text of the bot utterance. + orig_text: + type: string + description: >- + An original reponse given by the skill which can be transformed later by ResponseSelector + or Postprocessor. If it was transformed, the transformed response goes to the "text" field + and the original response is stored to the "orig_text" field. The field has value None by default. + user: + $ref: '#/components/schemas/Bot' + annotations: + $ref: '#/components/schemas/Annotations' + date_time: + type: string + format: datetime + description: REQUIRED. A time of the utterance receiving by the agent server. + confidence: + type: number + description: Skill confidence in its response. + active_skill: + type: string + description: >- + A name of the skill which was responsible for the final bot response generation. + Annotations: + description: >- + REQUIRED. The utterances annotations, or tags. The default values of the field is an empty array: []. If the dialog starts with "/start" utterance, this utterance is not being annotated. + type: object + ODQAResponse200Unit: + description: >- + A list of skill responses. Each response here is a hypothetical response to the same human utterance. So s skill should generate a number of possible reponses for each incoming human utterance. + type: array + items: + type: object + properties: + text: + description: A text reponse of the skill. + type: string + confidence: + description: >- + Skill confidence in its reponse. + type: number + ODQAResponse200Schema: + description: >- + A batch of lists or skill responses. A skill should provide a list of hypothetical answers for each incoming human utterance. + properties: + responses: + type: array + items: + $ref: '#/components/schemas/ODQAResponse200Unit' + examples: + GenericRequestBody: + description: one exaustive example + value: + id: 5d9b755eb8cd280022907f27 + location: lab + utterances: + - id: 5d9b755eb8cd280022907f29 + text: Hello + user: + id: 5d9b755eb8cd280022907f25 + user_telegram_id: vasily + user_type: human + device_type: cmd + persona: [] + profile: + name: None + gender: None + birthdate: None + location: None + home_coordinates: None + work_coordinates: None + occupation: None + income_per_year: None + attributes: {} + annotations: + ner: + tokens: + - Hello + tags: + - O + date_time: '2019-10-07 20:26:54.409000' + hypotheses: + - skill_name: chitchat + text: Hi! + confidence: 0.6 + - skill_name: odqa + text: to my friends + confidence: 0.23 + - id: 5d9b755eb8cd280022907f28 + active_skill: chitchat + confidence: 0.6 + text: Hi! + orig_text: None + user: + id: 5d9b755eb8cd280022907f26 + user_type: bot + persona: [] + attributes: {} + annotations: + bot_ner: + tokens: + - Hi + - '!' + tags: + - O + - O + date_time: '2019-10-07 20:26:54.856000' + - id: 5d9b7565b8cd280022907f2b + text: What is your name? + user: + id: 5d9b755eb8cd280022907f25 + user_telegram_id: к5698 + user_type: human + device_type: cmd + persona: [] + profile: + name: None + gender: None + birthdate: None + location: None + home_coordinates: None + work_coordinates: None + occupation: None + income_per_year: None + attributes: {} + annotations: + ner: + tokens: + - What + - is + - your + - name + - '?' + tags: + - O + - O + - O + - O + - O + date_time: '2019-10-07 20:27:01.193000' + hypotheses: + - skill_name: chitchat + text: My name is DeepPavlov Agent! + confidence: 0.9 + - skill_name: odqa + text: Alexander the Great + confidence: 0.5 + - id: 5d9b7565b8cd280022907f2a + active_skill: chitchat + confidence: 0.6 + text: My name is DeepPavlov Agent! + orig_text: None + user: + id: 5d9b755eb8cd280022907f26 + user_type: bot + persona: [] + attributes: {} + annotations: + bot_ner: + tokens: + - My + - name + - is + - DeepPavlov + - Agent + - '!' + tags: + - O + - O + - O + - O + - O + - O + date_time: '2019-10-07 20:27:01.367000' + channel_type: cmd_client + human: + id: 5d9b755eb8cd280022907f25 + user_telegram_id: к5698 + user_type: human + device_type: cmd + persona: [] + profile: + name: None + gender: None + birthdate: None + location: None + home_coordinates: None + work_coordinates: None + occupation: None + income_per_year: None + attributes: {} + bot: + id: 5d9b755eb8cd280022907f26 + user_type: bot + persona: [] + attributes: {} + version: 0.12.0 + ODQAResponse: + description: An example of Open Domain Question Answering (ODQA) skill. + value: + responses: + - + - text: Peter the Great was born at 1672. + confidence: 0.947 + - text: at 1672 + confidence: 0.998 + - + - text: The Earth population is 7 billions. + confidence: 0.3333 + - text: 7 billions + confidence: 0.36 \ No newline at end of file diff --git a/docs/source/intro/overview.rst b/docs/source/intro/overview.rst index 49aebd57..c4e00358 100644 --- a/docs/source/intro/overview.rst +++ b/docs/source/intro/overview.rst @@ -192,6 +192,7 @@ Running the Agent Agent can run both from container and from a local machine. The default Agent port is **4242**. **Container** +------------- 1. Connect to agent's container: @@ -208,6 +209,7 @@ Agent can run both from container and from a local machine. The default Agent po python -m core.run **Local machine** +----------------- 1. (optional) Please consider setting your locale according your input language to avoid decoding errors while communicating agent via command line. For example: @@ -243,8 +245,9 @@ Agent can run both from container and from a local machine. The default Agent po python -m core.run -ch telegram **HTTP api server** +------------------- -1. You can run agent api server from both container and local environment: +1. **Run the agent api server from both container and local environment** .. code:: bash @@ -252,7 +255,7 @@ Agent can run both from container and from a local machine. The default Agent po In both cases api will be accessible on your localhost -2. Web server accepts POST requests with application/json content-type: +2. **Web server accepts POST requests with application/json content-type** Request should be in form: @@ -283,17 +286,34 @@ Agent can run both from container and from a local machine. The default Agent po In case of wrong format, HTTP errors will be returned. - If you need the Agent server to return something different than ``user_id`` and ``reponse``, try the - :ref:`output formatters `. +3. **Arbitrary input format of the Agent Server** -3. In addition to everything else the HTTP api server allows viewing dialogs in the database through GET requests. - The result is returned in json format which can be easily prettifyed with various browser extensions. + If you want to pass anything except + ``user_id`` and ``payload``, just pass it as an additional key-value item, for example: + + .. code:: bash + + curl --header "Content-Type: application/json" \ + --request POST \ + --data '{"user_id":"xyz","payload":"hello", "my_custom_dialog_id": 111}' \ + http://localhost:4242 + + All additional items will be stored into the ``attributes`` field of a ``HumanUtterance``. + +4. **Modify the default response format of the Agent server** + + If you need the Agent server to return something different than ``user_id`` and ``reponse``, try the + :ref:`output formatters `. + +5. **View dialogs in the database through GET requests** + + The result is returned in json format which can be easily prettifyed with various browser extensions. Three main web pages are provided (examples are shown for the case when agent is running on http://localhost:4242): - * http://localhost:4242/dialogs - provides list of all dialogs (without utterances) - * http://localhost:4242/dialogs/all - provides list of all dialogs (with utterances) - * http://localhost:4242/dialogs/ - provides exact dialog (dialog_id can be seen on /dialogs page) + * http://localhost:4242/dialogs - provides list of all dialogs (without utterances) + * http://localhost:4242/dialogs/all - provides list of all dialogs (with utterances) + * http://localhost:4242/dialogs/ - provides exact dialog (dialog_id can be seen on /dialogs page) Analyzing the data