Skip to content

Commit

Permalink
0.1.0 release
Browse files Browse the repository at this point in the history
0.1.0 release
  • Loading branch information
jonheng authored Aug 26, 2021
2 parents 44ae12a + f5cb31b commit 8cca183
Show file tree
Hide file tree
Showing 148 changed files with 546 additions and 5,018 deletions.
2 changes: 2 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ run_non_slow_unit_tests:
- apt-get -y install build-essential
- apt-get update
- pip install -e .
- pip install -r requirements_extra.txt
- pip install -U pytest
- python -m nltk.downloader punkt
- echo 'Execute not slow unit tests'
Expand All @@ -28,6 +29,7 @@ run_slow_unit_tests:
- apt-get -y install build-essential
- apt-get update
- pip install -e .
- pip install -r requirements_extra.txt
- pip install -U pytest
- python -m nltk.downloader punkt
- echo 'Execute not slow unit tests'
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Machine learning models from Singapore's natural language processing (NLP) resea
`sgnlp` is a Python package that allows you to easily get started on using various (NLP) models implemented using the
Pytorch and Transfromers frameworks.

We have an accompanying [demo site](https://sgnlp.aks.aisingapore.net/) where you can interact with our models and get a
We have an accompanying [demo site](https://sgnlp.aisingapore.net/) where you can interact with our models and get a
better understanding on how they work.

## Installation
Expand All @@ -18,7 +18,7 @@ pip install sgnlp

## Documentation

Visit our [documentation](https://sgnlp.aks.aisingapore.net/docs/) for tutorials.
Visit our [documentation](https://sgnlp.aisingapore.net/docs/) for tutorials.

## License

Expand Down
11 changes: 11 additions & 0 deletions demo_api/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
## Building, running, and pushing image
```
# From root folder of repository:
docker build -t <model_name> -f demo_api/<model_name>/Dockerfile demo_api/
docker run -p 8000:8000 <model_name>
E.g.
docker build -t lsr -f demo_api/lsr/Dockerfile demo_api/
docker run -p 8000:8000 lsr
```
40 changes: 40 additions & 0 deletions demo_api/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import logging
import json
from flask import Flask, jsonify, make_response


def create_api(app_name, model_card_path, model_usage_path="usage.py"):
app = Flask(app_name)

# setup gunicorn logging
gunicorn_logger = logging.getLogger('gunicorn.error')
app.logger.handlers = gunicorn_logger.handlers
app.logger.setLevel(gunicorn_logger.level)

# Common routes
@app.route("/model-card", methods=["GET"])
def get_model_card():
"""GET method for model card
Returns:
json: model card in json format
"""
with open(model_card_path) as f:
model_card = json.load(f)
return jsonify(**model_card)

@app.route("/model-usage", methods=["GET"])
def get_model_usage():
try:
with open(model_usage_path) as f:
model_usage = f.read()
return jsonify(usage=model_usage)
except FileNotFoundError:
return make_response("Model usage not available.", 404)

# Kubernetes health check endpoint
@app.route("/healthz", methods=["GET"])
def healthz():
return make_response(jsonify({"healthy": True}), 200)

return app
7 changes: 3 additions & 4 deletions demo_api/emotion_entailment/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
FROM python:3.8-buster

COPY . /emotion_entailment
COPY . /demo_api

WORKDIR /emotion_entailment
WORKDIR /demo_api/emotion_entailment

RUN pip install -r requirements.txt

RUN python -m download_pretrained

CMD gunicorn --bind 0.0.0.0:8000 wsgi:app --timeout 180
CMD PYTHONPATH=../../ gunicorn -c ../gunicorn.conf.py
12 changes: 0 additions & 12 deletions demo_api/emotion_entailment/README.md

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import json
from flask import request, jsonify

import torch
import numpy as np
from flask import Flask, request, jsonify

from demo_api.common import create_api
from sgnlp.models.emotion_entailment import (
RecconEmotionEntailmentConfig,
RecconEmotionEntailmentTokenizer,
Expand All @@ -15,10 +13,7 @@
get_all_evidence_utterance_from_conversation,
)


app = Flask(__name__)

model_card_path = "model_card/emotion_entailment.json"
app = create_api(app_name=__name__, model_card_path="model_card/emotion_entailment.json")

config = RecconEmotionEntailmentConfig.from_pretrained(
"https://sgnlp.blob.core.windows.net/models/reccon_emotion_entailment/config.json"
Expand All @@ -32,18 +27,6 @@
postprocessor = RecconEmotionEntailmentPostprocessor()


@app.route("/model-card", methods=["GET"])
def get_model_card():
"""GET method for model card
Returns:
json: return model card in json format
"""
with open(model_card_path) as f:
model_card = json.load(f)
return jsonify(**model_card)


@app.route("/predict", methods=["POST"])
def predict():
"""Iterate through each evidence utt in context to perform RECCON emotion entailment.
Expand Down Expand Up @@ -100,4 +83,4 @@ def predict():


if __name__ == "__main__":
app.run(host="0.0.0.0")
app.run()
2 changes: 1 addition & 1 deletion demo_api/emotion_entailment/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ transformers==4.4.2
tokenizers==0.10.1
flask
gunicorn
sgnlp==0.0.1
sgnlp==0.1.0
4 changes: 0 additions & 4 deletions demo_api/emotion_entailment/wsgi.py

This file was deleted.

6 changes: 6 additions & 0 deletions demo_api/gunicorn.conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bind = "0.0.0.0:8000"
wsgi_app = "api:app"
timeout = 180
workers = 4
preload_app = True
raw_env = ["PYTHONPATH=../../", "TOKENIZERS_PARALLELISM=false"]
6 changes: 3 additions & 3 deletions demo_api/lif_3way_ap/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
FROM python:3.8-buster

COPY . /lif_3way_ap/
COPY . /demo_api

WORKDIR /lif_3way_ap
WORKDIR /demo_api/lif_3way_ap

RUN pip install -r requirements.txt
RUN python -m spacy download en_core_web_sm
RUN python -m download_pretrained

CMD gunicorn --bind 0.0.0.0:8000 wsgi:app --timeout 180
CMD PYTHONPATH=../../ gunicorn -c ../gunicorn.conf.py
11 changes: 0 additions & 11 deletions demo_api/lif_3way_ap/README.md

This file was deleted.

26 changes: 6 additions & 20 deletions demo_api/lif_3way_ap/model_api.py → demo_api/lif_3way_ap/api.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import json
import logging
from flask import Flask, request, jsonify
from flask import request
from transformers import cached_path

from demo_api.common import create_api
from sgnlp.models.lif_3way_ap import LIF3WayAPModel, LIF3WayAPConfig, LIF3WayAPPreprocessor
from transformers import cached_path

app = Flask(__name__)

app = create_api(app_name=__name__, model_card_path="model_card/lif_3way_ap.json")

gunicorn_logger = logging.getLogger('gunicorn.error')
app.logger.handlers = gunicorn_logger.handlers
Expand Down Expand Up @@ -35,20 +36,5 @@ def predict():
return {"probability": output["label_probs"].item()}


model_card_path = "model_card/lif_3way_ap.json"


@app.route("/model-card", methods=["GET"])
def get_model_card():
"""GET method for model card
Returns:
json: return model card in json format
"""
with open(model_card_path) as f:
model_card = json.load(f)
return jsonify(**model_card)


if __name__ == '__main__':
app.run(host='0.0.0.0')
app.run()
4 changes: 0 additions & 4 deletions demo_api/lif_3way_ap/wsgi.py

This file was deleted.

7 changes: 3 additions & 4 deletions demo_api/lsr/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
FROM python:3.8-buster

COPY . /lsr/
COPY . /demo_api

WORKDIR /lsr
WORKDIR /demo_api/lsr

RUN pip install -r requirements.txt

RUN python -m download_pretrained

CMD gunicorn --bind 0.0.0.0:8000 wsgi:app --timeout 180
CMD PYTHONPATH=../../ gunicorn -c ../gunicorn.conf.py
11 changes: 0 additions & 11 deletions demo_api/lsr/README.md

This file was deleted.

34 changes: 7 additions & 27 deletions demo_api/lsr/model_api.py → demo_api/lsr/api.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
import json
import logging
from flask import Flask, request, jsonify
from flask import request
from transformers import cached_path

from demo_api.common import create_api
from sgnlp.models.lsr import LsrModel, LsrConfig, LsrPreprocessor, LsrPostprocessor
from text_input_to_docred_pipeline import TextInputToDocredPipeline

app = Flask(__name__)

gunicorn_logger = logging.getLogger('gunicorn.error')
app.logger.handlers = gunicorn_logger.handlers
app.logger.setLevel(gunicorn_logger.level)
app = create_api(app_name=__name__, model_card_path="model_card/lsr.json")

# Download files from azure blob storage
rel2id_path = cached_path('https://sgnlp.blob.core.windows.net/models/lsr/rel2id.json')
Expand All @@ -28,8 +23,8 @@
pred_threshold=PRED_THRESHOLD)

# Load model
config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/config.json')
model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/pytorch_model.bin', config=config)
config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/config.json')
model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/pytorch_model.bin', config=config)
model.eval()

app.logger.info('Preprocessing pipeline and model initialization complete.')
Expand All @@ -53,23 +48,8 @@ def predict():
else:
tensor_doc = preprocessor([docred_doc])
output = model(**tensor_doc)
return postprocessor(output.prediction[0], docred_doc)


model_card_path = "model_card/lsr.json"


@app.route("/model-card", methods=["GET"])
def get_model_card():
"""GET method for model card
Returns:
json: return model card in json format
"""
with open(model_card_path) as f:
model_card = json.load(f)
return jsonify(**model_card)
return postprocessor(output.prediction, [docred_doc])[0]


if __name__ == '__main__':
app.run(host='0.0.0.0')
app.run()
4 changes: 2 additions & 2 deletions demo_api/lsr/download_pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@
ner2id_path = cached_path('https://sgnlp.blob.core.windows.net/models/lsr/ner2id.json')
rel_info_path = cached_path('https://sgnlp.blob.core.windows.net/models/lsr/rel_info.json')

config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/config.json')
model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/pytorch_model.bin', config=config)
config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/config.json')
model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/pytorch_model.bin', config=config)
4 changes: 2 additions & 2 deletions demo_api/lsr/model_card/lsr.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "LSR",
"languages": "English",
"description": "This is a neural network that induces a latent document-level graph and uses a refinement strategy that allows the model to incrementally aggregate relevant information for multi-hop reasoning.",
"description": "This is a neural network that induces a latent document-level graph and uses a refinement strategy that allows the model to incrementally aggregate relevant information for multi-hop reasoning. This particular model corresponds to the GloVe+LSR model described in the paper.",
"paper": {
"text": "Nan, G., Guo, Z., Sekulić, I., & Lu, W. (2020). Reasoning with Latent Structure Refinement for Document-Level Relation Extraction. Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, July 2020 (pp. 1546-1557).",
"url": "https://aclanthology.org/2020.acl-main.141/"
Expand All @@ -14,7 +14,7 @@
"text": "DocRED",
"url": "https://github.com/thunlp/DocRED/tree/master/data"
},
"evaluationScores": "0.55 F1 on development set. 0.59 F1 reported by authors in paper on development set.",
"evaluationScores": "0.55 F1 on development set. 0.55 F1 reported by authors in paper on development set.",
"trainingConfig": {
"text": "Not available."
},
Expand Down
2 changes: 1 addition & 1 deletion demo_api/lsr/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ transformers
textdistance==4.2.1
flask
gunicorn
sgnlp==0.0.1
sgnlp==0.1.0
8 changes: 3 additions & 5 deletions demo_api/lsr/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
pred_threshold=PRED_THRESHOLD)

# Load model
config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/config.json')
model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/pytorch_model.bin', config=config)
config = LsrConfig.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/config.json')
model = LsrModel.from_pretrained('https://sgnlp.blob.core.windows.net/models/lsr/v2/pytorch_model.bin', config=config)
model.eval()

# DocRED-like instance
Expand Down Expand Up @@ -90,6 +90,4 @@
tensor_doc = preprocessor([instance])
output = model(**tensor_doc)

result = postprocessor(output.prediction[0], instance)
print(result)

result = postprocessor(output.prediction, [instance])
Loading

0 comments on commit 8cca183

Please sign in to comment.