diff --git a/config.json b/config.json index 1ebcb16..418f74d 100644 --- a/config.json +++ b/config.json @@ -1,13 +1,22 @@ { "models": [ { + "serviceName": "ner", + "modelBasePath": "src/ner/agri_ner_akai/local/.", + "apiBasePath": "ner/agri_ner_akai/local/", + "containerPort": 8000, + "environment": {}, + "nginx": [], + "build": true + }, + { "serviceName": "word_score", "modelBasePath": "src/search/word_score/local/.", - "apiBasePath": "/search/word_score/local", + "apiBasePath": "/search/word_score/local/", "containerPort": 8000, "environment": {}, "nginx": [], - "build": false + "build": true }, { "serviceName": "spell_check", diff --git a/src/ner/README.md b/src/ner/README.md new file mode 100644 index 0000000..e69de29 diff --git a/src/ner/agri_ner_akai/README.md b/src/ner/agri_ner_akai/README.md new file mode 100644 index 0000000..e69de29 diff --git a/src/ner/agri_ner_akai/local/Dockerfile b/src/ner/agri_ner_akai/local/Dockerfile new file mode 100644 index 0000000..97897b3 --- /dev/null +++ b/src/ner/agri_ner_akai/local/Dockerfile @@ -0,0 +1,15 @@ +# Use an official Python runtime as a parent image +FROM python:3.9-slim + +WORKDIR /app + + +#install requirements +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt + +# Copy the rest of the application code to the working directory +COPY . /app/ +EXPOSE 8000 +# Set the entrypoint for the container +CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"] diff --git a/src/ner/agri_ner_akai/local/README.md b/src/ner/agri_ner_akai/local/README.md new file mode 100644 index 0000000..5c5a066 --- /dev/null +++ b/src/ner/agri_ner_akai/local/README.md @@ -0,0 +1,21 @@ +## NER: + + +### Purpose : +Model to detect +- crops +- pests +- seed type + + +### Testing the model deployment : +To run for testing just the Hugging Face deployment for grievence recognition, you can follow the following steps : + +- Git clone the repo +- Go to current folder location i.e. ``` cd /src/ner/agri_ner_akai/local ``` +- Create docker image file and test the api: +``` +docker build -t testmodel . +docker run -p 8000:8000 testmodel +curl -X POST -H "Content-Type: application/json" -d '{"text": "What are tomatoes and potaotes that are being attacked by aphids? "}' http://localhost:8000/ +``` diff --git a/src/ner/agri_ner_akai/local/__init__.py b/src/ner/agri_ner_akai/local/__init__.py new file mode 100644 index 0000000..7faa07a --- /dev/null +++ b/src/ner/agri_ner_akai/local/__init__.py @@ -0,0 +1,2 @@ +from .request import ModelRequest +from .request import Model diff --git a/src/ner/agri_ner_akai/local/api.py b/src/ner/agri_ner_akai/local/api.py new file mode 100644 index 0000000..3968bb6 --- /dev/null +++ b/src/ner/agri_ner_akai/local/api.py @@ -0,0 +1,25 @@ +from model import Model +from request import ModelRequest +from quart import Quart, request, jsonify +import aiohttp + +app = Quart(__name__) + +model = None + +@app.before_serving +async def startup(): + app.client = aiohttp.ClientSession() + global model + model = Model(app) + +@app.route('/', methods=['POST']) +async def embed(): + global model + data = await request.get_json() + req = ModelRequest(**data) + entities = await model.inference(req) + return jsonify(entities) # Convert the list of entities to JSON format + +if __name__ == "__main__": + app.run() \ No newline at end of file diff --git a/src/ner/agri_ner_akai/local/model.py b/src/ner/agri_ner_akai/local/model.py new file mode 100644 index 0000000..4c1cdaa --- /dev/null +++ b/src/ner/agri_ner_akai/local/model.py @@ -0,0 +1,68 @@ +from transformers import pipeline +from request import ModelRequest + +class Model(): + def __new__(cls, context): + cls.context = context + if not hasattr(cls, 'instance'): + cls.instance = super(Model, cls).__new__(cls) + cls.nlp_ner = pipeline("ner", model="GautamR/akai_ner", tokenizer="GautamR/akai_ner") + return cls.instance + + async def inference(self, request: ModelRequest): + entities = self.nlp_ner(request.text) + return self.aggregate_entities(request.text, entities) + + @staticmethod + def aggregate_entities(sentence, entity_outputs): + aggregated_entities = [] + current_entity = None + + for entity in entity_outputs: + entity_type = entity["entity"].split("-")[-1] + + # Handle subwords + if entity["word"].startswith("##"): + # If we encounter an I-PEST or any other I- entity + if "I-" in entity["entity"]: + if current_entity: # Add previous entity + aggregated_entities.append(current_entity) + + word_start = sentence.rfind(" ", 0, entity["start"]) + 1 + word_end = sentence.find(" ", entity["end"]) + if word_end == -1: + word_end = len(sentence) + + current_entity = { + "entity_group": entity_type, + "score": float(entity["score"]), + "word": sentence[word_start:word_end].replace('.','').replace('?',''), + "start": float(word_start), + "end": float(word_end) + } + aggregated_entities.append(current_entity) + current_entity = None + + else: + # If it's a subword but not an I- entity + current_entity["word"] += entity["word"][2:] + current_entity["end"] = entity["end"] + current_entity["score"] = float((current_entity["score"] + entity["score"]) / 2) # averaging scores + + # Handle full words + else: + if current_entity: + aggregated_entities.append(current_entity) + + current_entity = { + "entity_group": entity_type, + "score": float(entity["score"]), + "word": entity["word"], + "start": float(entity["start"]), + "end": float(entity["end"]) + } + + if current_entity: + aggregated_entities.append(current_entity) + + return aggregated_entities diff --git a/src/ner/agri_ner_akai/local/request.py b/src/ner/agri_ner_akai/local/request.py new file mode 100644 index 0000000..918b8c2 --- /dev/null +++ b/src/ner/agri_ner_akai/local/request.py @@ -0,0 +1,11 @@ +import requests +import json + + +class ModelRequest(): + def __init__(self, text): + self.text = text + + def to_json(self): + return json.dumps(self, default=lambda o: o.__dict__, + sort_keys=True, indent=4) \ No newline at end of file diff --git a/src/ner/agri_ner_akai/local/requirements.txt b/src/ner/agri_ner_akai/local/requirements.txt new file mode 100644 index 0000000..2cb2afc --- /dev/null +++ b/src/ner/agri_ner_akai/local/requirements.txt @@ -0,0 +1,4 @@ +torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu +transformers +quart +aiohttp \ No newline at end of file