Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop to Master: September 19 #441

Merged
merged 14 commits into from
Sep 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@ __pycache__/

# haptik ner config file
config
model_config

# C extensions
# C Extensions
*.so

# Distribution / packaging
Expand Down
30 changes: 0 additions & 30 deletions chatbot_ner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
import logging.handlers
import os

import dotenv
from elasticsearch import RequestsHttpConnection
from requests_aws4auth import AWS4Auth

BASE_DIR = os.path.dirname(os.path.dirname(__file__))
MODEL_CONFIG_PATH = os.path.join(BASE_DIR, 'model_config')

LOG_PATH = os.path.join(BASE_DIR, 'logs')

# TODO: Set this up via Django LOGGING
Expand Down Expand Up @@ -116,34 +113,7 @@
ner_logger.warning('`ES_AWS_SERVICE` and `ES_AWS_REGION` are not set. '
'This is not a problem if you are using self hosted ES')

# TODO: Remove non functional crf code and cleanup
# Model Vars
# Crf Model Specific (Mandatory to use CRF Model)
CRF_MODELS_PATH = os.environ.get('MODELS_PATH')
CRF_EMBEDDINGS_PATH_VOCAB = os.environ.get('EMBEDDINGS_PATH_VOCAB')
CRF_EMBEDDINGS_PATH_VECTORS = os.environ.get('EMBEDDINGS_PATH_VECTORS')

if os.path.exists(MODEL_CONFIG_PATH):
dotenv.read_dotenv(MODEL_CONFIG_PATH)
else:
ner_logger.warning('Warning: no file named "model_config" found at %s. This is not a problem if you '
'dont want to run NER with ML models', MODEL_CONFIG_PATH)

CITY_MODEL_TYPE = os.environ.get('CITY_MODEL_TYPE')
CITY_MODEL_PATH = os.environ.get('CITY_MODEL_PATH')
DATE_MODEL_TYPE = os.environ.get('DATE_MODEL_TYPE')
DATE_MODEL_PATH = os.environ.get('DATE_MODEL_PATH')
if not CITY_MODEL_PATH:
CITY_MODEL_PATH = os.path.join(BASE_DIR, 'data', 'models', 'crf', 'city', 'model_13062017.crf')
if not DATE_MODEL_PATH:
DATE_MODEL_PATH = os.path.join(BASE_DIR, 'data', 'models', 'crf', 'date', 'model_date.crf')

# Crf Model Specific with additional AWS storage (optional)
CRF_MODEL_S3_BUCKET_NAME = os.environ.get('CRF_MODEL_S3_BUCKET_NAME')
CRF_MODEL_S3_BUCKET_REGION = os.environ.get('CRF_MODEL_S3_BUCKET_REGION')
WORD_EMBEDDING_REMOTE_URL = os.environ.get('WORD_EMBEDDING_REMOTE_URL')
GOOGLE_TRANSLATE_API_KEY = os.environ.get('GOOGLE_TRANSLATE_API_KEY')

if not GOOGLE_TRANSLATE_API_KEY:
ner_logger.warning('Google Translate API key is null or not set')
GOOGLE_TRANSLATE_API_KEY = ''
3 changes: 1 addition & 2 deletions chatbot_ner/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
'datastore',
'ner_v1',
'ner_v2',
'models',
'django_nose'
]

Expand Down Expand Up @@ -137,7 +136,7 @@ def __getitem__(self, item):
'--ignore-files=constants.py',
'--ignore-files=run_postman_tests.py',
'--cover-erase',
'--cover-package=datastore,external_api,language_utilities,lib,models,ner_v1,ner_v2',
'--cover-package=datastore,external_api,language_utilities,lib,ner_v1,ner_v2',
'--cover-inclusive',
]

Expand Down
2 changes: 2 additions & 0 deletions chatbot_ner/setup_sentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def setup_sentry():
from sentry_sdk.integrations.logging import LoggingIntegration

def before_sentry_send(event, hint):
if event.get('logger', None) == 'elasticapm.transport':
return None
event.setdefault("tags", {})["cas_name"] = CLIENT_APPLICATIONS_SETUP_NAME
return event

Expand Down
3 changes: 0 additions & 3 deletions chatbot_ner/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@
url(r'^entities/get_crf_training_data', external_api.get_crf_training_data),
url(r'^entities/update_crf_training_data', external_api.update_crf_training_data),

# Deprecated train crf model
url(r'^entities/train_crf_model', external_api.train_crf_model),

url(r'^entities/languages/v1/(?P<entity_name>.+)$', external_api.entity_language_view),
url(r'^entities/data/v1/(?P<entity_name>.+)$', external_api.entity_data_view),

Expand Down
11 changes: 0 additions & 11 deletions config.example
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,3 @@ DESTINATION_PORT=
# In order to enable entity detection for multiple languages, we use google translate. Please enter the key(optional)
GOOGLE_TRANSLATE_API_KEY=

# Deprecated CRF models configuration
MODELS_PATH=
WORD_EMBEDDING_REMOTE_URL=
EMBEDDINGS_PATH_VECTORS=
EMBEDDINGS_PATH_VOCAB=
CITY_MODEL_PATH=
CITY_MODEL_TYPE=crf
CRF_MODEL_S3_BUCKET_NAME=
CRF_MODEL_S3_BUCKET_REGION=
DATE_MODEL_PATH=
DATE_MODEL_TYPE=
39 changes: 0 additions & 39 deletions docker/Dockerfile

This file was deleted.

8 changes: 2 additions & 6 deletions docker/Dockerfile-python3
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# This is to automated chatbot_ner installation

FROM python:3.6.10

RUN apt-get update && apt-get install -y wget build-essential curl nginx supervisor
Expand All @@ -15,11 +13,9 @@ COPY docker/install.sh nltk_setup.py datastore_setup.py /app/
COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf

# cython is installed because pandas build fails otherwise
RUN mkdir -p ~/model_lib && \
mkdir -p /root/models && \
/app/install.sh && \
RUN /app/install.sh && \
touch /app/config && \
touch /app/model_config && \
pip install -U pip && \
pip install --no-cache-dir -I uwsgi && \
pip install cython

Expand Down
16 changes: 0 additions & 16 deletions docker/install.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,5 @@
#!/bin/bash
mkdir -p ~/model_lib
cd /tmp/
wget ftp://ftp.netbsd.org/pub/pkgsrc/distfiles/CRF++-0.58.tar.gz
tar -xzf CRF++-0.58.tar.gz -C ~/model_lib/
cd ~/model_lib/CRF++-0.58/
./configure
make
make install

echo "export LD_LIBRARY_PATH=/usr/local/lib" >> ~/.bashrc

cd python
python setup.py build
python setup.py install

# Get sample Nginx file for routing and GUI

cd /tmp
wget "https://s3-us-west-2.amazonaws.com/chatbotner/chatbot_ner_nginx/default"
bash -c "cat /tmp/default > /etc/nginx/sites-available/default"
Expand Down
58 changes: 1 addition & 57 deletions external_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
FetchIndexForAliasException, DeleteIndexFromAliasException
from chatbot_ner.config import ner_logger
from external_api.constants import ENTITY_DATA, ENTITY_NAME, LANGUAGE_SCRIPT, ENTITY_LIST, \
EXTERNAL_API_DATA, SENTENCE_LIST, READ_MODEL_FROM_S3, ES_CONFIG, READ_EMBEDDINGS_FROM_REMOTE_URL, \
LIVE_CRF_MODEL_PATH, SENTENCES, LANGUAGES
EXTERNAL_API_DATA, SENTENCES, LANGUAGES

from django.views.decorators.csrf import csrf_exempt
from models.crf_v2.crf_train import CrfTrain

from external_api.lib import dictionary_utils
from external_api.response_utils import external_api_response_wrapper
Expand Down Expand Up @@ -210,60 +208,6 @@ def update_crf_training_data(request):
return HttpResponse(json.dumps(response), content_type='application/json', status=200)


@csrf_exempt
def train_crf_model(request):
"""
This method is used to train crf model.
Args:
request (HttpResponse): HTTP response from url
Returns:
HttpResponse : HttpResponse with appropriate status and error message.
Post Request Body:
key: "external_api_data"
value: {
"entity_name": "crf_test",
"read_model_from_s3": true,
"es_config": true,
"read_embeddings_from_remote_url": true
}
"""
response = {"success": False, "error": "", "result": {}}
try:
external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
entity_name = external_api_data.get(ENTITY_NAME)
read_model_from_s3 = external_api_data.get(READ_MODEL_FROM_S3)
es_config = external_api_data.get(ES_CONFIG)
read_embeddings_from_remote_url = external_api_data.get(READ_EMBEDDINGS_FROM_REMOTE_URL)
crf_model = CrfTrain(entity_name=entity_name,
read_model_from_s3=read_model_from_s3,
read_embeddings_from_remote_url=read_embeddings_from_remote_url)

if es_config:
model_path = crf_model.train_model_from_es_data()
else:
sentence_list = external_api_data.get(SENTENCE_LIST)
entity_list = external_api_data.get(ENTITY_LIST)
model_path = crf_model.train_crf_model_from_list(sentence_list=sentence_list, entity_list=entity_list)

response['result'] = {LIVE_CRF_MODEL_PATH: model_path}
response['success'] = True

except (IndexNotFoundException, InvalidESURLException,
SourceDestinationSimilarException, InternalBackupException, AliasNotFoundException,
PointIndexToAliasException, FetchIndexForAliasException, DeleteIndexFromAliasException,
AliasForTransferException, IndexForTransferException, NonESEngineTransferException) as error_message:
response['error'] = str(error_message)
ner_logger.exception('Error: %s' % error_message)
return HttpResponse(json.dumps(response), content_type='application/json', status=500)

except Exception as e:
response['error'] = str(e)
ner_logger.exception('Error: %s' % e)
return HttpResponse(json.dumps(response), content_type='application/json', status=500)

return HttpResponse(json.dumps(response), content_type='application/json', status=200)


@csrf_exempt
@external_api_response_wrapper
def entity_language_view(request, entity_name):
Expand Down
5 changes: 0 additions & 5 deletions external_api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,4 @@
LANGUAGE_SCRIPT = 'language_script'
ENTITY_LIST = 'entity_list'
SENTENCE_LIST = 'sentence_list'


READ_MODEL_FROM_S3 = 'read_model_from_s3'
ES_CONFIG = 'es_config'
READ_EMBEDDINGS_FROM_REMOTE_URL = 'read_embeddings_from_remote_url'
LIVE_CRF_MODEL_PATH = 'live_crf_model_path'
78 changes: 0 additions & 78 deletions lib/aws_utils.py

This file was deleted.

9 changes: 0 additions & 9 deletions model_config.example

This file was deleted.

Empty file removed models/__init__.py
Empty file.
Empty file removed models/crf/__init__.py
Empty file.
Loading