Merge branch 'Samagra-Development:restructure' into restructure

ChatWithPDF · Sep 13, 2023 · 983994c · 983994c
2 parents c8a5839 + 3f054a4
commit 983994c
Show file tree

Hide file tree

Showing 10 changed files with 157 additions and 2 deletions.
diff --git a/config.json b/config.json
@@ -1,13 +1,22 @@
 {
  "models": [
  {
+ "serviceName": "ner",
+ "modelBasePath": "src/ner/agri_ner_akai/local/.",
+ "apiBasePath": "ner/agri_ner_akai/local/",
+ "containerPort": 8000,
+ "environment": {},
+ "nginx": [],
+ "build": true
+ }, 
+ {
  "serviceName": "word_score",
  "modelBasePath": "src/search/word_score/local/.",
- "apiBasePath": "/search/word_score/local",
+ "apiBasePath": "/search/word_score/local/",
  "containerPort": 8000,
  "environment": {},
  "nginx": [],
- "build": false
+ "build": true
  },
  {
  "serviceName": "spell_check",

diff --git a/src/ner/README.md b/src/ner/README.md
diff --git a/src/ner/agri_ner_akai/README.md b/src/ner/agri_ner_akai/README.md
diff --git a/src/ner/agri_ner_akai/local/Dockerfile b/src/ner/agri_ner_akai/local/Dockerfile
@@ -0,0 +1,15 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+WORKDIR /app
+
+
+#install requirements
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+
+# Copy the rest of the application code to the working directory
+COPY . /app/
+EXPOSE 8000
+# Set the entrypoint for the container
+CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
diff --git a/src/ner/agri_ner_akai/local/README.md b/src/ner/agri_ner_akai/local/README.md
@@ -0,0 +1,21 @@
+## NER:
+
+
+### Purpose :
+Model to detect
+- crops
+- pests
+- seed type 
+
+
+### Testing the model deployment : 
+To run for testing just the Hugging Face deployment for grievence recognition, you can follow the following steps : 
+
+- Git clone the repo
+- Go to current folder location i.e. ``` cd /src/ner/agri_ner_akai/local ```
+- Create docker image file and test the api: 
+```
+docker build -t testmodel .
+docker run -p 8000:8000 testmodel
+curl -X POST -H "Content-Type: application/json" -d '{"text": "What are tomatoes and potaotes that are being attacked by aphids? "}' http://localhost:8000/
+```
diff --git a/src/ner/agri_ner_akai/local/__init__.py b/src/ner/agri_ner_akai/local/__init__.py
@@ -0,0 +1,2 @@
+from .request import ModelRequest
+from .request import Model
diff --git a/src/ner/agri_ner_akai/local/api.py b/src/ner/agri_ner_akai/local/api.py
@@ -0,0 +1,25 @@
+from model import Model
+from request import ModelRequest
+from quart import Quart, request, jsonify
+import aiohttp
+
+app = Quart(__name__)
+
+model = None
+
+@app.before_serving
+async def startup():
+ app.client = aiohttp.ClientSession()
+ global model
+ model = Model(app)
+
+@app.route('/', methods=['POST'])
+async def embed():
+ global model
+ data = await request.get_json()
+ req = ModelRequest(**data)
+ entities = await model.inference(req)
+ return jsonify(entities) # Convert the list of entities to JSON format
+
+if __name__ == "__main__":
+ app.run()
diff --git a/src/ner/agri_ner_akai/local/model.py b/src/ner/agri_ner_akai/local/model.py
@@ -0,0 +1,68 @@
+from transformers import pipeline
+from request import ModelRequest
+
+class Model():
+ def __new__(cls, context):
+ cls.context = context
+ if not hasattr(cls, 'instance'):
+ cls.instance = super(Model, cls).__new__(cls)
+ cls.nlp_ner = pipeline("ner", model="GautamR/akai_ner", tokenizer="GautamR/akai_ner")
+ return cls.instance
+
+ async def inference(self, request: ModelRequest):
+ entities = self.nlp_ner(request.text)
+ return self.aggregate_entities(request.text, entities)
+
+ @staticmethod
+ def aggregate_entities(sentence, entity_outputs):
+ aggregated_entities = []
+ current_entity = None
+
+ for entity in entity_outputs:
+ entity_type = entity["entity"].split("-")[-1]
+
+ # Handle subwords
+ if entity["word"].startswith("##"):
+ # If we encounter an I-PEST or any other I- entity
+ if "I-" in entity["entity"]:
+ if current_entity: # Add previous entity
+ aggregated_entities.append(current_entity)
+
+ word_start = sentence.rfind(" ", 0, entity["start"]) + 1
+ word_end = sentence.find(" ", entity["end"])
+ if word_end == -1:
+ word_end = len(sentence)
+
+ current_entity = {
+ "entity_group": entity_type,
+ "score": float(entity["score"]),
+ "word": sentence[word_start:word_end].replace('.','').replace('?',''),
+ "start": float(word_start),
+ "end": float(word_end)
+ }
+ aggregated_entities.append(current_entity)
+ current_entity = None
+
+ else:
+ # If it's a subword but not an I- entity
+ current_entity["word"] += entity["word"][2:]
+ current_entity["end"] = entity["end"]
+ current_entity["score"] = float((current_entity["score"] + entity["score"]) / 2) # averaging scores
+
+ # Handle full words
+ else:
+ if current_entity:
+ aggregated_entities.append(current_entity)
+
+ current_entity = {
+ "entity_group": entity_type,
+ "score": float(entity["score"]),
+ "word": entity["word"],
+ "start": float(entity["start"]),
+ "end": float(entity["end"])
+ }
+
+ if current_entity:
+ aggregated_entities.append(current_entity)
+
+ return aggregated_entities
diff --git a/src/ner/agri_ner_akai/local/request.py b/src/ner/agri_ner_akai/local/request.py
@@ -0,0 +1,11 @@
+import requests
+import json
+
+
+class ModelRequest():
+ def __init__(self, text):
+ self.text = text
+
+ def to_json(self):
+ return json.dumps(self, default=lambda o: o.__dict__,
+ sort_keys=True, indent=4)
diff --git a/src/ner/agri_ner_akai/local/requirements.txt b/src/ner/agri_ner_akai/local/requirements.txt
@@ -0,0 +1,4 @@
+torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu
+transformers
+quart
+aiohttp