Skip to content

Commit

Permalink
Merge pull request #418 from hellohaptik/removed_unused_crf_code
Browse files Browse the repository at this point in the history
Removed unused crf code
  • Loading branch information
chiragjn authored Sep 21, 2021
2 parents afc39f7 + 4bf32e8 commit 72a48c8
Show file tree
Hide file tree
Showing 37 changed files with 32 additions and 2,325 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@ __pycache__/

# haptik ner config file
config
model_config

# C extensions
# C Extensions
*.so

# Distribution / packaging
Expand Down
30 changes: 0 additions & 30 deletions chatbot_ner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
import logging.handlers
import os

import dotenv
from elasticsearch import RequestsHttpConnection
from requests_aws4auth import AWS4Auth

BASE_DIR = os.path.dirname(os.path.dirname(__file__))
MODEL_CONFIG_PATH = os.path.join(BASE_DIR, 'model_config')

LOG_PATH = os.path.join(BASE_DIR, 'logs')

# TODO: Set this up via Django LOGGING
Expand Down Expand Up @@ -116,34 +113,7 @@
ner_logger.warning('`ES_AWS_SERVICE` and `ES_AWS_REGION` are not set. '
'This is not a problem if you are using self hosted ES')

# TODO: Remove non functional crf code and cleanup
# Model Vars
# Crf Model Specific (Mandatory to use CRF Model)
CRF_MODELS_PATH = os.environ.get('MODELS_PATH')
CRF_EMBEDDINGS_PATH_VOCAB = os.environ.get('EMBEDDINGS_PATH_VOCAB')
CRF_EMBEDDINGS_PATH_VECTORS = os.environ.get('EMBEDDINGS_PATH_VECTORS')

if os.path.exists(MODEL_CONFIG_PATH):
dotenv.read_dotenv(MODEL_CONFIG_PATH)
else:
ner_logger.warning('Warning: no file named "model_config" found at %s. This is not a problem if you '
'dont want to run NER with ML models', MODEL_CONFIG_PATH)

CITY_MODEL_TYPE = os.environ.get('CITY_MODEL_TYPE')
CITY_MODEL_PATH = os.environ.get('CITY_MODEL_PATH')
DATE_MODEL_TYPE = os.environ.get('DATE_MODEL_TYPE')
DATE_MODEL_PATH = os.environ.get('DATE_MODEL_PATH')
if not CITY_MODEL_PATH:
CITY_MODEL_PATH = os.path.join(BASE_DIR, 'data', 'models', 'crf', 'city', 'model_13062017.crf')
if not DATE_MODEL_PATH:
DATE_MODEL_PATH = os.path.join(BASE_DIR, 'data', 'models', 'crf', 'date', 'model_date.crf')

# Crf Model Specific with additional AWS storage (optional)
CRF_MODEL_S3_BUCKET_NAME = os.environ.get('CRF_MODEL_S3_BUCKET_NAME')
CRF_MODEL_S3_BUCKET_REGION = os.environ.get('CRF_MODEL_S3_BUCKET_REGION')
WORD_EMBEDDING_REMOTE_URL = os.environ.get('WORD_EMBEDDING_REMOTE_URL')
GOOGLE_TRANSLATE_API_KEY = os.environ.get('GOOGLE_TRANSLATE_API_KEY')

if not GOOGLE_TRANSLATE_API_KEY:
ner_logger.warning('Google Translate API key is null or not set')
GOOGLE_TRANSLATE_API_KEY = ''
3 changes: 1 addition & 2 deletions chatbot_ner/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
'datastore',
'ner_v1',
'ner_v2',
'models',
'django_nose'
]

Expand Down Expand Up @@ -137,7 +136,7 @@ def __getitem__(self, item):
'--ignore-files=constants.py',
'--ignore-files=run_postman_tests.py',
'--cover-erase',
'--cover-package=datastore,external_api,language_utilities,lib,models,ner_v1,ner_v2',
'--cover-package=datastore,external_api,language_utilities,lib,ner_v1,ner_v2',
'--cover-inclusive',
]

Expand Down
3 changes: 0 additions & 3 deletions chatbot_ner/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@
url(r'^entities/get_crf_training_data', external_api.get_crf_training_data),
url(r'^entities/update_crf_training_data', external_api.update_crf_training_data),

# Deprecated train crf model
url(r'^entities/train_crf_model', external_api.train_crf_model),

url(r'^entities/languages/v1/(?P<entity_name>.+)$', external_api.entity_language_view),
url(r'^entities/data/v1/(?P<entity_name>.+)$', external_api.entity_data_view),

Expand Down
11 changes: 0 additions & 11 deletions config.example
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,3 @@ DESTINATION_PORT=
# In order to enable entity detection for multiple languages, we use google translate. Please enter the key(optional)
GOOGLE_TRANSLATE_API_KEY=

# Deprecated CRF models configuration
MODELS_PATH=
WORD_EMBEDDING_REMOTE_URL=
EMBEDDINGS_PATH_VECTORS=
EMBEDDINGS_PATH_VOCAB=
CITY_MODEL_PATH=
CITY_MODEL_TYPE=crf
CRF_MODEL_S3_BUCKET_NAME=
CRF_MODEL_S3_BUCKET_REGION=
DATE_MODEL_PATH=
DATE_MODEL_TYPE=
39 changes: 0 additions & 39 deletions docker/Dockerfile

This file was deleted.

8 changes: 2 additions & 6 deletions docker/Dockerfile-python3
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# This is to automated chatbot_ner installation

FROM python:3.6.10

RUN apt-get update && apt-get install -y wget build-essential curl nginx supervisor
Expand All @@ -15,11 +13,9 @@ COPY docker/install.sh nltk_setup.py datastore_setup.py /app/
COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf

# cython is installed because pandas build fails otherwise
RUN mkdir -p ~/model_lib && \
mkdir -p /root/models && \
/app/install.sh && \
RUN /app/install.sh && \
touch /app/config && \
touch /app/model_config && \
pip install -U pip && \
pip install --no-cache-dir -I uwsgi && \
pip install cython

Expand Down
16 changes: 0 additions & 16 deletions docker/install.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,5 @@
#!/bin/bash
mkdir -p ~/model_lib
cd /tmp/
wget ftp://ftp.netbsd.org/pub/pkgsrc/distfiles/CRF++-0.58.tar.gz
tar -xzf CRF++-0.58.tar.gz -C ~/model_lib/
cd ~/model_lib/CRF++-0.58/
./configure
make
make install

echo "export LD_LIBRARY_PATH=/usr/local/lib" >> ~/.bashrc

cd python
python setup.py build
python setup.py install

# Get sample Nginx file for routing and GUI

cd /tmp
wget "https://s3-us-west-2.amazonaws.com/chatbotner/chatbot_ner_nginx/default"
bash -c "cat /tmp/default > /etc/nginx/sites-available/default"
Expand Down
58 changes: 1 addition & 57 deletions external_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
FetchIndexForAliasException, DeleteIndexFromAliasException
from chatbot_ner.config import ner_logger
from external_api.constants import ENTITY_DATA, ENTITY_NAME, LANGUAGE_SCRIPT, ENTITY_LIST, \
EXTERNAL_API_DATA, SENTENCE_LIST, READ_MODEL_FROM_S3, ES_CONFIG, READ_EMBEDDINGS_FROM_REMOTE_URL, \
LIVE_CRF_MODEL_PATH, SENTENCES, LANGUAGES
EXTERNAL_API_DATA, SENTENCES, LANGUAGES

from django.views.decorators.csrf import csrf_exempt
from models.crf_v2.crf_train import CrfTrain

from external_api.lib import dictionary_utils
from external_api.response_utils import external_api_response_wrapper
Expand Down Expand Up @@ -210,60 +208,6 @@ def update_crf_training_data(request):
return HttpResponse(json.dumps(response), content_type='application/json', status=200)


@csrf_exempt
def train_crf_model(request):
"""
This method is used to train crf model.
Args:
request (HttpResponse): HTTP response from url
Returns:
HttpResponse : HttpResponse with appropriate status and error message.
Post Request Body:
key: "external_api_data"
value: {
"entity_name": "crf_test",
"read_model_from_s3": true,
"es_config": true,
"read_embeddings_from_remote_url": true
}
"""
response = {"success": False, "error": "", "result": {}}
try:
external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
entity_name = external_api_data.get(ENTITY_NAME)
read_model_from_s3 = external_api_data.get(READ_MODEL_FROM_S3)
es_config = external_api_data.get(ES_CONFIG)
read_embeddings_from_remote_url = external_api_data.get(READ_EMBEDDINGS_FROM_REMOTE_URL)
crf_model = CrfTrain(entity_name=entity_name,
read_model_from_s3=read_model_from_s3,
read_embeddings_from_remote_url=read_embeddings_from_remote_url)

if es_config:
model_path = crf_model.train_model_from_es_data()
else:
sentence_list = external_api_data.get(SENTENCE_LIST)
entity_list = external_api_data.get(ENTITY_LIST)
model_path = crf_model.train_crf_model_from_list(sentence_list=sentence_list, entity_list=entity_list)

response['result'] = {LIVE_CRF_MODEL_PATH: model_path}
response['success'] = True

except (IndexNotFoundException, InvalidESURLException,
SourceDestinationSimilarException, InternalBackupException, AliasNotFoundException,
PointIndexToAliasException, FetchIndexForAliasException, DeleteIndexFromAliasException,
AliasForTransferException, IndexForTransferException, NonESEngineTransferException) as error_message:
response['error'] = str(error_message)
ner_logger.exception('Error: %s' % error_message)
return HttpResponse(json.dumps(response), content_type='application/json', status=500)

except Exception as e:
response['error'] = str(e)
ner_logger.exception('Error: %s' % e)
return HttpResponse(json.dumps(response), content_type='application/json', status=500)

return HttpResponse(json.dumps(response), content_type='application/json', status=200)


@csrf_exempt
@external_api_response_wrapper
def entity_language_view(request, entity_name):
Expand Down
5 changes: 0 additions & 5 deletions external_api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,4 @@
LANGUAGE_SCRIPT = 'language_script'
ENTITY_LIST = 'entity_list'
SENTENCE_LIST = 'sentence_list'


READ_MODEL_FROM_S3 = 'read_model_from_s3'
ES_CONFIG = 'es_config'
READ_EMBEDDINGS_FROM_REMOTE_URL = 'read_embeddings_from_remote_url'
LIVE_CRF_MODEL_PATH = 'live_crf_model_path'
78 changes: 0 additions & 78 deletions lib/aws_utils.py

This file was deleted.

9 changes: 0 additions & 9 deletions model_config.example

This file was deleted.

Empty file removed models/__init__.py
Empty file.
Empty file removed models/crf/__init__.py
Empty file.
Loading

0 comments on commit 72a48c8

Please sign in to comment.