0.1.0 release

aisingapore · Aug 26, 2021 · 8cca183 · 8cca183
2 parents 44ae12a + f5cb31b
commit 8cca183
Show file tree

Hide file tree

Showing 148 changed files with 546 additions and 5,018 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -13,6 +13,7 @@ run_non_slow_unit_tests:
     - apt-get -y install build-essential
     - apt-get update
     - pip install -e .
+    - pip install -r requirements_extra.txt
     - pip install -U pytest
     - python -m nltk.downloader punkt
     - echo 'Execute not slow unit tests'
@@ -28,6 +29,7 @@ run_slow_unit_tests:
     - apt-get -y install build-essential
     - apt-get update
     - pip install -e .
+    - pip install -r requirements_extra.txt
     - pip install -U pytest
     - python -m nltk.downloader punkt
     - echo 'Execute not slow unit tests'

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ Machine learning models from Singapore's natural language processing (NLP) resea
 `sgnlp` is a Python package that allows you to easily get started on using various (NLP) models implemented using the
 Pytorch and Transfromers frameworks.
 
-We have an accompanying [demo site](https://sgnlp.aks.aisingapore.net/) where you can interact with our models and get a
+We have an accompanying [demo site](https://sgnlp.aisingapore.net/) where you can interact with our models and get a
 better understanding on how they work.
 
 ## Installation
@@ -18,7 +18,7 @@ pip install sgnlp
 
 ## Documentation
 
-Visit our [documentation](https://sgnlp.aks.aisingapore.net/docs/) for tutorials.
+Visit our [documentation](https://sgnlp.aisingapore.net/docs/) for tutorials.
 
 ## License
 

diff --git a/demo_api/README.md b/demo_api/README.md
@@ -0,0 +1,11 @@
+## Building, running, and pushing image
+```
+# From root folder of repository:
+docker build -t <model_name> -f demo_api/<model_name>/Dockerfile demo_api/
+
+docker run -p 8000:8000 <model_name>
+
+E.g.
+docker build -t lsr -f demo_api/lsr/Dockerfile demo_api/
+docker run -p 8000:8000 lsr
+```
diff --git a/demo_api/common.py b/demo_api/common.py
@@ -0,0 +1,40 @@
+import logging
+import json
+from flask import Flask, jsonify, make_response
+
+
+def create_api(app_name, model_card_path, model_usage_path="usage.py"):
+    app = Flask(app_name)
+
+    # setup gunicorn logging
+    gunicorn_logger = logging.getLogger('gunicorn.error')
+    app.logger.handlers = gunicorn_logger.handlers
+    app.logger.setLevel(gunicorn_logger.level)
+
+    # Common routes
+    @app.route("/model-card", methods=["GET"])
+    def get_model_card():
+        """GET method for model card
+
+        Returns:
+            json: model card in json format
+        """
+        with open(model_card_path) as f:
+            model_card = json.load(f)
+        return jsonify(**model_card)
+
+    @app.route("/model-usage", methods=["GET"])
+    def get_model_usage():
+        try:
+            with open(model_usage_path) as f:
+                model_usage = f.read()
+            return jsonify(usage=model_usage)
+        except FileNotFoundError:
+            return make_response("Model usage not available.", 404)
+
+    # Kubernetes health check endpoint
+    @app.route("/healthz", methods=["GET"])
+    def healthz():
+        return make_response(jsonify({"healthy": True}), 200)
+
+    return app
diff --git a/demo_api/emotion_entailment/Dockerfile b/demo_api/emotion_entailment/Dockerfile
@@ -1,11 +1,10 @@
 FROM python:3.8-buster
 
-COPY . /emotion_entailment
+COPY . /demo_api
 
-WORKDIR /emotion_entailment
+WORKDIR /demo_api/emotion_entailment
 
 RUN pip install -r requirements.txt
-
 RUN python -m download_pretrained
 
-CMD gunicorn --bind 0.0.0.0:8000 wsgi:app --timeout 180
+CMD PYTHONPATH=../../ gunicorn -c ../gunicorn.conf.py
diff --git a/demo_api/emotion_entailment/README.md b/demo_api/emotion_entailment/README.md
diff --git a/demo_api/emotion_entailment/model_api.py → demo_api/emotion_entailment/api.py b/demo_api/emotion_entailment/model_api.py → demo_api/emotion_entailment/api.py
@@ -1,9 +1,7 @@
 import json
+from flask import request, jsonify
 
-import torch
-import numpy as np
-from flask import Flask, request, jsonify
-
+from demo_api.common import create_api
 from sgnlp.models.emotion_entailment import (
     RecconEmotionEntailmentConfig,
     RecconEmotionEntailmentTokenizer,
@@ -15,10 +13,7 @@
     get_all_evidence_utterance_from_conversation,
 )
 
-
-app = Flask(__name__)
-
-model_card_path = "model_card/emotion_entailment.json"
+app = create_api(app_name=__name__, model_card_path="model_card/emotion_entailment.json")
 
 config = RecconEmotionEntailmentConfig.from_pretrained(
     "https://sgnlp.blob.core.windows.net/models/reccon_emotion_entailment/config.json"
@@ -32,18 +27,6 @@
 postprocessor = RecconEmotionEntailmentPostprocessor()
 
 
-@app.route("/model-card", methods=["GET"])
-def get_model_card():
-    """GET method for model card
-
-    Returns:
-        json: return model card in json format
-    """
-    with open(model_card_path) as f:
-        model_card = json.load(f)
-    return jsonify(**model_card)
-
-
 @app.route("/predict", methods=["POST"])
 def predict():
     """Iterate through each evidence utt in context to perform RECCON emotion entailment.
@@ -100,4 +83,4 @@ def predict():
 
 
 if __name__ == "__main__":
-    app.run(host="0.0.0.0")
+    app.run()
diff --git a/demo_api/emotion_entailment/requirements.txt b/demo_api/emotion_entailment/requirements.txt
@@ -6,4 +6,4 @@ transformers==4.4.2
 tokenizers==0.10.1
 flask
 gunicorn
-sgnlp==0.0.1
+sgnlp==0.1.0
diff --git a/demo_api/emotion_entailment/wsgi.py b/demo_api/emotion_entailment/wsgi.py
diff --git a/demo_api/gunicorn.conf.py b/demo_api/gunicorn.conf.py
@@ -0,0 +1,6 @@
+bind = "0.0.0.0:8000"
+wsgi_app = "api:app"
+timeout = 180
+workers = 4
+preload_app = True
+raw_env = ["PYTHONPATH=../../", "TOKENIZERS_PARALLELISM=false"]
diff --git a/demo_api/lif_3way_ap/Dockerfile b/demo_api/lif_3way_ap/Dockerfile
@@ -1,11 +1,11 @@
 FROM python:3.8-buster
 
-COPY . /lif_3way_ap/
+COPY . /demo_api
 
-WORKDIR /lif_3way_ap
+WORKDIR /demo_api/lif_3way_ap
 
 RUN pip install -r requirements.txt
 RUN python -m spacy download en_core_web_sm
 RUN python -m download_pretrained
 
-CMD gunicorn --bind 0.0.0.0:8000 wsgi:app --timeout 180
+CMD PYTHONPATH=../../ gunicorn -c ../gunicorn.conf.py
diff --git a/demo_api/lif_3way_ap/README.md b/demo_api/lif_3way_ap/README.md
diff --git a/demo_api/lif_3way_ap/model_api.py → demo_api/lif_3way_ap/api.py b/demo_api/lif_3way_ap/model_api.py → demo_api/lif_3way_ap/api.py
@@ -1,11 +1,12 @@
-import json
 import logging
-from flask import Flask, request, jsonify
+from flask import request
+from transformers import cached_path
 
+from demo_api.common import create_api
 from sgnlp.models.lif_3way_ap import LIF3WayAPModel, LIF3WayAPConfig, LIF3WayAPPreprocessor
-from transformers import cached_path
 
-app = Flask(__name__)
+
+app = create_api(app_name=__name__, model_card_path="model_card/lif_3way_ap.json")
 
 gunicorn_logger = logging.getLogger('gunicorn.error')
 app.logger.handlers = gunicorn_logger.handlers
@@ -35,20 +36,5 @@ def predict():
     return {"probability": output["label_probs"].item()}
 
 
-model_card_path = "model_card/lif_3way_ap.json"
-
-
-@app.route("/model-card", methods=["GET"])
-def get_model_card():
-    """GET method for model card
-
-    Returns:
-        json: return model card in json format
-    """
-    with open(model_card_path) as f:
-        model_card = json.load(f)
-    return jsonify(**model_card)
-
-
 if __name__ == '__main__':
-    app.run(host='0.0.0.0')
+    app.run()
diff --git a/demo_api/lif_3way_ap/wsgi.py b/demo_api/lif_3way_ap/wsgi.py
diff --git a/demo_api/lsr/Dockerfile b/demo_api/lsr/Dockerfile
@@ -1,11 +1,10 @@
 FROM python:3.8-buster
 
-COPY . /lsr/
+COPY . /demo_api
 
-WORKDIR /lsr
+WORKDIR /demo_api/lsr
 
 RUN pip install -r requirements.txt
-
 RUN python -m download_pretrained
 
-CMD gunicorn --bind 0.0.0.0:8000 wsgi:app --timeout 180
+CMD PYTHONPATH=../../ gunicorn -c ../gunicorn.conf.py
diff --git a/demo_api/lsr/README.md b/demo_api/lsr/README.md
diff --git a/demo_api/lsr/model_api.py → demo_api/lsr/api.py b/demo_api/lsr/model_api.py → demo_api/lsr/api.py
@@ -1,16 +1,11 @@
-import json
-import logging
-from flask import Flask, request, jsonify
+from flask import request
 from transformers import cached_path
 
+from demo_api.common import create_api
 from sgnlp.models.lsr import LsrModel, LsrConfig, LsrPreprocessor, LsrPostprocessor
 from text_input_to_docred_pipeline import TextInputToDocredPipeline
 
-app = Flask(__name__)
-
-gunicorn_logger = logging.getLogger('gunicorn.error')
-app.logger.handlers = gunicorn_logger.handlers
-app.logger.setLevel(gunicorn_logger.level)
+app = create_api(app_name=__name__, model_card_path="model_card/lsr.json")
 
 # Download files from azure blob storage
 rel2id_path = cached_path('https://sgnlp.blob.core.windows.net/models/lsr/rel2id.json')
@@ -28,8 +23,8 @@
                                                  pred_threshold=PRED_THRESHOLD)
 
 # Load model
-config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/config.json')
-model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/pytorch_model.bin', config=config)
+config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/config.json')
+model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/pytorch_model.bin', config=config)
 model.eval()
 
 app.logger.info('Preprocessing pipeline and model initialization complete.')
@@ -53,23 +48,8 @@ def predict():
     else:
         tensor_doc = preprocessor([docred_doc])
         output = model(**tensor_doc)
-        return postprocessor(output.prediction[0], docred_doc)
-
-
-model_card_path = "model_card/lsr.json"
-
-
-@app.route("/model-card", methods=["GET"])
-def get_model_card():
-    """GET method for model card
-
-    Returns:
-        json: return model card in json format
-    """
-    with open(model_card_path) as f:
-        model_card = json.load(f)
-    return jsonify(**model_card)
+        return postprocessor(output.prediction, [docred_doc])[0]
 
 
 if __name__ == '__main__':
-    app.run(host='0.0.0.0')
+    app.run()
diff --git a/demo_api/lsr/download_pretrained.py b/demo_api/lsr/download_pretrained.py
@@ -17,5 +17,5 @@
 ner2id_path = cached_path('https://sgnlp.blob.core.windows.net/models/lsr/ner2id.json')
 rel_info_path = cached_path('https://sgnlp.blob.core.windows.net/models/lsr/rel_info.json')
 
-config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/config.json')
-model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/pytorch_model.bin', config=config)
+config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/config.json')
+model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/pytorch_model.bin', config=config)
diff --git a/demo_api/lsr/model_card/lsr.json b/demo_api/lsr/model_card/lsr.json
@@ -1,7 +1,7 @@
 {
   "name": "LSR",
   "languages": "English",
-  "description": "This is a neural network that induces a latent document-level graph and uses a refinement strategy that allows the model to incrementally aggregate relevant information for multi-hop reasoning.",
+  "description": "This is a neural network that induces a latent document-level graph and uses a refinement strategy that allows the model to incrementally aggregate relevant information for multi-hop reasoning. This particular model corresponds to the GloVe+LSR model described in the paper.",
   "paper": {
     "text": "Nan, G., Guo, Z., Sekulić, I., & Lu, W. (2020). Reasoning with Latent Structure Refinement for Document-Level Relation Extraction. Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, July 2020 (pp. 1546-1557).",
     "url": "https://aclanthology.org/2020.acl-main.141/"
@@ -14,7 +14,7 @@
     "text": "DocRED",
     "url": "https://github.com/thunlp/DocRED/tree/master/data"
   },
-  "evaluationScores": "0.55 F1 on development set. 0.59 F1 reported by authors in paper on development set.",
+  "evaluationScores": "0.55 F1 on development set. 0.55 F1 reported by authors in paper on development set.",
   "trainingConfig": {
     "text": "Not available."
   },

diff --git a/demo_api/lsr/requirements.txt b/demo_api/lsr/requirements.txt
@@ -7,4 +7,4 @@ transformers
 textdistance==4.2.1
 flask
 gunicorn
-sgnlp==0.0.1
+sgnlp==0.1.0
diff --git a/demo_api/lsr/usage.py b/demo_api/lsr/usage.py
@@ -13,8 +13,8 @@
                                                  pred_threshold=PRED_THRESHOLD)
 
 # Load model
-config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/config.json')
-model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/pytorch_model.bin', config=config)
+config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/config.json')
+model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/pytorch_model.bin', config=config)
 model.eval()
 
 # DocRED-like instance
@@ -90,6 +90,4 @@
 tensor_doc = preprocessor([instance])
 output = model(**tensor_doc)
 
-result = postprocessor(output.prediction[0], instance)
-print(result)
-
+result = postprocessor(output.prediction, [instance])